## Prime Numbers

In [1]:
[i for i in range(2, 20, 2)]

[2, 4, 6, 8, 10, 12, 14, 16, 18]

In [2]:
[j for i in range(2, 4) for j in range(i*2, 20, i)]

[4, 6, 8, 10, 12, 14, 16, 18, 6, 9, 12, 15, 18]

Find the prime numbers up to n=20.  
*2, 3, 5, 7, 11, 13, 17, 19*  
Consider only multiples of numbers up to square root of n ([Sieve of Eratosthenes](https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes)).

In [3]:
from math import sqrt
n = 20

In [4]:
{j for i in range(2, int(sqrt(n))) for j in range(i*2, n, i)}

{4, 6, 8, 9, 10, 12, 14, 15, 16, 18}

In [5]:
no_primes = {j for i in range(2, int(sqrt(n))) for j in range(i*2, n, i)}
no_primes

{4, 6, 8, 9, 10, 12, 14, 15, 16, 18}

In [6]:
primes = {x for x in range(2, n) if x not in no_primes}
primes

{2, 3, 5, 7, 11, 13, 17, 19}

## Recursive Functions

In [7]:
def do_something(n):
    if n <= 1:
        return 1
    else:
        return n * (do_something(n - 1))

In [8]:
do_something(4)

24

In [9]:
import numpy as np

Create all the possible combinations of a certain length using values in array.

In [10]:
def combine(array, length):
    
    if length == 0:
        return [np.empty(0)]
    
    combinations = []
    
    for i in range(0, len(array)):
        element = array[i]
        remaining = array[i+1:]
        rem_comb = combine(remaining, length-1)
        
        for comb in rem_comb:
            combinations.append(np.array([element, *comb]))
        
    return combinations

In [11]:
arr = np.array([1, 2, 3, 4, 5])

In [12]:
combine(arr, 3)

[array([1, 2, 3]),
 array([1, 2, 4]),
 array([1, 2, 5]),
 array([1, 3, 4]),
 array([1, 3, 5]),
 array([1, 4, 5]),
 array([2, 3, 4]),
 array([2, 3, 5]),
 array([2, 4, 5]),
 array([3, 4, 5])]

### Reshape Usage

In [13]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
newarr = arr.reshape(4, 3)

In [14]:
newarr

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [15]:
newarr = arr.reshape(2, 3, 2)

In [16]:
newarr

array([[[ 1,  2],
        [ 3,  4],
        [ 5,  6]],

       [[ 7,  8],
        [ 9, 10],
        [11, 12]]])

In [17]:
newarr = arr.reshape(2, -1)

In [18]:
newarr

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]])

In [19]:
newarr.reshape(-1)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

## Roman2Decimal

Transform a number from roman form to decimal.

In [20]:
def roman2decimal(roman):

    values = {'I': 1,
              'V': 5,
              'X': 10,
              'L': 50,
              'C': 100,
              'D': 500,
              'M': 1000}
    
    decimal = 0
    last_value = 0
    
    for char in roman[::-1]:
        value = values[char]
        if value >= last_value:
            decimal += value
        else:
            decimal -= value
        last_value = value
    
    return decimal

In [21]:
roman2decimal('MCMXCVII')

1997

## Pandas

*The dataset used in the next section can be retrieved from [here](https://www.kaggle.com/datasets/kaggle/us-baby-names).*

In [22]:
import pandas as pd

In [25]:
# Import the dataset
names = pd.read_csv('StateNames.csv')

In [26]:
# Show the first five rows
names.head()

Unnamed: 0,Id,Name,Year,Gender,State,Count
0,1,Mary,1910,F,AK,14
1,2,Annie,1910,F,AK,12
2,3,Anna,1910,F,AK,10
3,4,Margaret,1910,F,AK,8
4,5,Helen,1910,F,AK,7


In [27]:
# Get rid of the first column
names = names.drop('Id', axis=1)

In [28]:
# Transform 'Year' from int to string
names['Year'] = names['Year'].astype(str)

In [29]:
# how many females and males are in the dataset
names.groupby('Gender').sum('Count')

Unnamed: 0_level_0,Count
Gender,Unnamed: 1_level_1
F,143770075
M,155113251


In [30]:
# how many rows for each gender
names['Gender'].value_counts()

F    3154009
M    2493417
Name: Gender, dtype: int64

In [31]:
# for each name, retrieve the total number of children and 
# sort the values in descending order
names.groupby('Name').sum('Count').sort_values('Count', ascending=False)

Unnamed: 0_level_0,Count
Name,Unnamed: 1_level_1
James,4957166
John,4845414
Robert,4725713
Michael,4312975
William,3839236
...,...
Lynleigh,5
Lynee,5
Lynea,5
Lyndie,5


In [32]:
# for each year, sum of female newborns in California
names[(names['State'] == 'CA') & (names['Gender'] == 'F')].groupby('Year').sum()

Unnamed: 0_level_0,Count
Year,Unnamed: 1_level_1
1910,5950
1911,6602
1912,9803
1913,11860
1914,13814
...,...
2010,210587
2011,207102
2012,208355
2013,204089


In [33]:
# for each state, mean of newborns called John
names[names['Name'] == 'John'].groupby('State').mean()

Unnamed: 0_level_0,Count
State,Unnamed: 1_level_1
AK,67.52381
AL,597.478261
AR,380.350746
AZ,284.759259
CA,1758.548023
CO,406.212389
CT,622.046875
DC,316.583333
DE,144.066667
FL,736.890323


In [34]:
# for each state and year, which is the percentage of newborns called 'John'

In [35]:
total_newborns = names.groupby(['State', 'Year']).sum()

In [36]:
johns = names[(names['Name'] == 'John')].groupby(['State', 'Year']).sum()

In [37]:
(johns/total_newborns).sort_values('Count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
State,Year,Unnamed: 2_level_1
AK,1911,0.178571
AK,1913,0.172727
NV,1912,0.158940
NV,1910,0.149254
NV,1911,0.129412
...,...,...
UT,2005,0.002088
CA,2014,0.002078
CA,2011,0.002074
CA,2012,0.002064


### Multi-Index Usage

In [38]:
johns_perc = johns/total_newborns

In [39]:
names.loc[0:10, 'Year':'Count']

Unnamed: 0,Year,Gender,State,Count
0,1910,F,AK,14
1,1910,F,AK,12
2,1910,F,AK,10
3,1910,F,AK,8
4,1910,F,AK,7
5,1910,F,AK,6
6,1910,F,AK,6
7,1910,F,AK,5
8,1911,F,AK,12
9,1911,F,AK,7


In [40]:
johns_perc.loc['AK':'CA']

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
State,Year,Unnamed: 2_level_1
AK,1910,0.069565
AK,1911,0.178571
AK,1912,0.113475
AK,1913,0.172727
AK,1914,0.069388
...,...,...
CA,2010,0.002160
CA,2011,0.002074
CA,2012,0.002064
CA,2013,0.002118


In [41]:
johns_perc.xs(('AK', '2000'), level=['State', 'Year'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
State,Year,Unnamed: 2_level_1
AK,2000,0.00875


In [42]:
johns_perc.loc['AK':'CA'].xs(slice('2000', '2010'), level='Year', drop_level=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
State,Year,Unnamed: 2_level_1
AK,2000,0.00875
AK,2001,0.007114
AK,2002,0.006939
AK,2003,0.007177
AK,2004,0.0068
AK,2005,0.007838
AK,2006,0.007169
AK,2007,0.006576
AK,2008,0.006238
AK,2009,0.004141


In [43]:
johns_perc.query('State=="AK" and Year=="1919"')

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
State,Year,Unnamed: 2_level_1
AK,1919,0.057971


In [44]:
johns_perc.query('State in ["AK", "CA", "FL"] and Year in ["1990", "1991", "1992"]')

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
State,Year,Unnamed: 2_level_1
AK,1990,0.008174
AK,1991,0.011113
AK,1992,0.010624
CA,1990,0.00528
CA,1991,0.004965
CA,1992,0.004874
FL,1990,0.008183
FL,1991,0.008349
FL,1992,0.007484


## Bagels

Recreate the game explained [here](https://sites.math.washington.edu//~mathcircle/mmc/mmc2010/PicoFermiBagel.pdf).

In [45]:
class Bagels():
    def __init__(self, num_digits, max_guesses):
        self.num_digits = num_digits
        self.max_guesses = max_guesses
    
    def _get_secret_num_(self, num_digits):
        num = np.random.randint(0, 10, num_digits)
        return num
    
    def _check_num_(self, num, guess):
        if np.array_equal(guess, num):
            return 'You got it!'

        clues = []
        for i in range(len(num)):
            if guess[i] == num[i]:
                clues.append('Fermi')
            elif guess[i] in num:
                clues.append('Pico')

        if len(clues) == 0:
            return 'Bagels'

        return ', '.join(clues)
    
    def _get_guess_(self):
        guess = input('Enter your guess:')
        guess = np.array([int(guess[0]), int(guess[1]), int(guess[2])])
        return guess
        
    def play(self):
        secret_num = self._get_secret_num_(self.num_digits)
        current_guesses = 0
        winner = False
        
        while current_guesses < self.max_guesses:
            guess = self._get_guess_()
            res = self._check_num_(secret_num, guess)
            print(res)
            if res == 'You got it!':
                winner = True
                break
        
        if not winner:
            print('You lost.')

In [None]:
game = Bagels(3, 10)

In [None]:
game.play()