# Python Collection


## 1) Dictionary with str Index

In [4]:
import pandas as pd

#squeeze will make a value pair
#index_col - declare the index of the dictionary
df_pokemon = pd.read_csv("data//pokemon.csv",index_col="Pokemon",squeeze=True)
df_pokemon.sort_index(inplace=True)
df_pokemon.head(8)

Pokemon
Abomasnow       Grass
Abra          Psychic
Absol            Dark
Accelgor          Bug
Aegislash       Steel
Aerodactyl       Rock
Aggron          Steel
Aipom          Normal
Name: Type, dtype: object

In [5]:
df_pokemon["Charmander":"Cherubi"],df_pokemon[["Ivysaur","Charizard"]]

(Pokemon
 Charmander      Fire
 Charmeleon      Fire
 Chatot        Normal
 Cherrim        Grass
 Cherubi        Grass
 Name: Type, dtype: object, Pokemon
 Ivysaur      Grass
 Charizard     Fire
 Name: Type, dtype: object)

In [7]:
df_pokemon.get(key=["Charizard","Digimon"])  # get returns NAN if not exists

Pokemon
Charizard    Fire
Digimon       NaN
Name: Type, dtype: object

## 2) Aggregation Methods

In [8]:
df_pokemon.count() #excluses NAN  vs   len(df_pokemon)

721

In [29]:
df_google = pd.read_csv("data//google_stock_price.csv",squeeze=True)

In [102]:
df_google[1000:1010]

1000    247.26
1001    250.17
1002    251.05
1003    249.76
1004    252.49
1005    254.82
1006    248.90
1007    245.00
1008    242.26
1009    243.02
Name: Stock Price, dtype: float64

In [26]:
df_google.std() #standard deviation

173.18720477113106

In [11]:
{
"Median" : df_google.median()
,"Mean" : df_google.mean()
,"Max" : df_google.max()
,"Min" : df_google.min()   
,"Index Max" : df_google.idxmax() #index that holds highest value
,"Index Min" : df_google.idxmin() #index that holds lowest value
}
    

{'Median': 283.315,
 'Mean': 334.31009296148744,
 'Max': 782.22,
 'Min': 49.95,
 'Index Max': 3011,
 'Index Min': 11}

In [12]:
#group by count
df_pokemon.value_counts(ascending=True)

Flying        3
Fairy        17
Steel        22
Ice          23
Ghost        23
Dragon       24
Fighting     25
Poison       28
Dark         28
Ground       30
Electric     36
Rock         41
Fire         47
Psychic      47
Bug          63
Grass        66
Normal       93
Water       105
Name: Type, dtype: int64

In [13]:
#normalize with breakdown by percentage (SUM=100%)
df_pokemon.value_counts(normalize=True).sum(),df_pokemon.value_counts(normalize=True)

(0.9999999999999999, Water       0.145631
 Normal      0.128988
 Grass       0.091540
 Bug         0.087379
 Psychic     0.065187
 Fire        0.065187
 Rock        0.056865
 Electric    0.049931
 Ground      0.041609
 Dark        0.038835
 Poison      0.038835
 Fighting    0.034674
 Dragon      0.033287
 Ghost       0.031900
 Ice         0.031900
 Steel       0.030513
 Fairy       0.023578
 Flying      0.004161
 Name: Type, dtype: float64)

In [14]:
 #mode is a series
type(df_google.mode()),df_google.mode()

(pandas.core.series.Series, 0    291.21
 dtype: float64)

## 2) Apply

In [36]:
def CheckPerformance(val):
    if val <230:
        ret = "Poor"
    elif val <250:
        ret = "Okay"
    elif val >=270 and val <= 300:
        ret = "Nice"
    else: ret = "Great"
    return ret
    
df_google[1000:1010].apply(CheckPerformance)

1000     Okay
1001    Great
1002    Great
1003     Okay
1004    Great
1005    Great
1006     Okay
1007     Okay
1008     Okay
1009     Okay
Name: Stock Price, dtype: object

## 3) Apply - Lambda

In [42]:
df_google[1000:1010].apply(lambda stock_price : if )

1000    24726.0
1001    25017.0
1002    25105.0
1003    24976.0
1004    25249.0
1005    25482.0
1006    24890.0
1007    24500.0
1008    24226.0
1009    24302.0
Name: Stock Price, dtype: float64

In [66]:
#reduce function (CHECK MORE)
from functools import reduce
reduce(lambda d, i: (i[0] < 7 and d.__setitem__(*i[::-1]), d)[-1], [{}, *{1:2, 3:4, 5:6, 7:8}.items()])   

{2: 1, 4: 3, 6: 5}

## 4) Map - JOIN / MERGE

In [80]:
df_nba1 = pd.read_csv("data//nba.csv",squeeze=True,usecols=["Name","Age"])
df_nba1[:5]

Unnamed: 0,Name,Age
0,Avery Bradley,25.0
1,Jae Crowder,25.0
2,John Holland,27.0
3,R.J. Hunter,22.0
4,Jonas Jerebko,29.0


In [90]:
df_nba2 = pd.read_csv("data//nba.csv",squeeze=True,usecols=["Name","Weight","Height"])
#df_nba2["Name"] = df_nba2["Name"] + '*'
df_nba2[:5]

Unnamed: 0,Name,Height,Weight
0,Avery Bradley,6-2,180.0
1,Jae Crowder,6-6,235.0
2,John Holland,6-5,205.0
3,R.J. Hunter,6-5,185.0
4,Jonas Jerebko,6-10,231.0


In [91]:
df_nba1.join(df_nba2, how='left', lsuffix='', rsuffix='_right')[:10]

Unnamed: 0,Name,Age,Name_right,Height,Weight
0,Avery Bradley,25.0,Avery Bradley,6-2,180.0
1,Jae Crowder,25.0,Jae Crowder,6-6,235.0
2,John Holland,27.0,John Holland,6-5,205.0
3,R.J. Hunter,22.0,R.J. Hunter,6-5,185.0
4,Jonas Jerebko,29.0,Jonas Jerebko,6-10,231.0
5,Amir Johnson,29.0,Amir Johnson,6-9,240.0
6,Jordan Mickey,21.0,Jordan Mickey,6-8,235.0
7,Kelly Olynyk,25.0,Kelly Olynyk,7-0,238.0
8,Terry Rozier,22.0,Terry Rozier,6-2,190.0
9,Marcus Smart,22.0,Marcus Smart,6-4,220.0


In [92]:
df_nba1.merge(df_nba2, how='left')[:10]

Unnamed: 0,Name,Age,Height,Weight
0,Avery Bradley,25.0,6-2,180.0
1,Jae Crowder,25.0,6-6,235.0
2,John Holland,27.0,6-5,205.0
3,R.J. Hunter,22.0,6-5,185.0
4,Jonas Jerebko,29.0,6-10,231.0
5,Amir Johnson,29.0,6-9,240.0
6,Jordan Mickey,21.0,6-8,235.0
7,Kelly Olynyk,25.0,7-0,238.0
8,Terry Rozier,22.0,6-2,190.0
9,Marcus Smart,22.0,6-4,220.0
