In [None]:
import numpy as np
import pandas as pd

<h1>Series</h1>

In [None]:
s = pd.Series([1,2,3,4,5])
print(s)
print(s.values)
print(s.index)
print(s[1])
print(s[:2])

In [None]:
s = pd.Series([1,2,3,4,5],
              index=["A","B","C","D","E"])
print(s)
print(s["C"])
print(s[["C","D","E"]])
print("B" in s)

In [None]:
s = pd.Series([1,2,3,4,5],
              index=[2,4,6,8,10])
print(s)
print(s[4])
print(s[2:])
print(s[2:4])
print(s.unique())
print(s.value_counts())
print(s.isin([2, 3]))
print(s.iloc[1]) #find by system index rather than the one I have assigned
print(s.iloc[0:2])
print(s.reindex(range(20))) #create a index range from 0 - 19. Return NaN for value for index that are missing
print(s.reindex(range(20), method="bfill")) #fill in NaN with previous value

In [None]:
s = pd.Series([0,0.25, 0.5, 0.75, 1.0],
              index= ["A","B","C","D","E"])
print(s)
print(s.keys())
print(list(s.items()))
# Add
s["F"] = 1.25
print(s)
print(s["A": "B"])
print(s[0:4])
print(s[(s > 0.4)])
print(s[(s > 0.4) & (s < 0.8)])

<h1>DataFrame</h1>

In [None]:
table = pd.DataFrame([{"A":2, "B":4, "D":3},
                      {"A":4, "B":7, "C":2},
                      {"A":10, "B":9, "E":6}])

print(table)

In [None]:
random_table = pd.DataFrame(np.random.rand(5,5),
                            columns=["A", "B", "C", "D", "E"],
                            index=[1,2,3,4,5])
print(random_table)

In [None]:
pop_tuple = {"London" : 139781209,
             "Hamford" : 21312412,
             "New Ham" : 12312424,
             "Hammersmith" : 123451241,
             "Startford" : 12314123,
             }
print(pop_tuple)

<h1>DataFrame Indexing</h1>

In [None]:
pop_tuple = {"London" : 139781209,
             "Hamford" : 21312412,
             "New Ham" : 12312424,
             "Hammersmith" : 123451241,
             "Startford" : 12314123,
             }

population = pd.Series(pop_tuple)
print(population)
print(population["London"])
print(population["London": "New Ham"])

male_tuple = {"London" : 41231,
             "Hamford" : 31231,
             "New Ham" : 31232,
             "Hammersmith" : 32311,
             "Startford" : 312321,
             }
male_population = pd.Series(male_tuple)
print(male_population)

female_tuple = {"London" : 54542,
             "Hamford" : 32131,
             "New Ham" : 6536,
             "Hammersmith" : 53453,
             "Startford" : 653453,
             }
female_population = pd.Series(female_tuple)
print(female_population)

korea_df = pd.DataFrame({"인구수" : population,
                         "남자인구수" : male_population,
                         "여자인구수" : female_population})

print(korea_df)
print(korea_df.index)
print(korea_df.columns)
print(korea_df["남자인구수"])
print(korea_df["London" : "New Ham"])
print(korea_df.여자인구수 > 10000)
print(korea_df.인구수 < 10000)

In [None]:
print(korea_df)
print(korea_df.남자인구수) #you can just put . after the DataFrame name to get the data instead of korea_df["남자인구수"]
print(korea_df.여자인구수)

korea_df["남자비율"] = (korea_df["남자인구수"]*100 / korea_df["여자인구수"])
print(korea_df.남자비율)

korea_df["여자비율"] = (korea_df.여자인구수 * 100 / korea_df.남자인구수) # same but different method
print(korea_df.여자비율)

print(korea_df.values)

print("")

print(korea_df.values[0]) #values for just London

print(korea_df.T) #Transpose. Fliping X and Y
      
print(korea_df.loc[(korea_df.여자인구수 < 1000)])
print(korea_df.loc[(korea_df.인구수 > 20000)])
print(korea_df.loc[:"New Ham", :"남자인구수"]) #x and y slicing
print(korea_df.loc[:"New Ham", :"남자인구수"])
print(korea_df.loc[korea_df.남자비율 > 100]) #loc returns value
print(korea_df.남자비율 > 100) #this returns True or False
print(korea_df.iloc[korea_df.남자비율 > 100]) #iloc returns int values
print(korea_df.loc[(korea_df.인구수 > 25000) & (korea_df.남자비율 > 100)]) #use of &

<h1>Index</h1>

In [None]:
idx = pd.Index([2,4,6,8,10])
print(idx)
print(idx[1])
print(idx.size)
print(idx.shape)
print(idx.ndim)
print(idx.dtype)
print(idx[1:4])
print(idx[1:2:2])
print(idx[::2])

In [None]:
idx1 = pd.Index([1,2,4,6,8])
idx2 = pd.Index([2,4,5,6,7])
print(idx1.append(idx2))
print(idx1.difference(idx2))
print(idx1 - idx2)
print(idx1.intersection(idx2)) # Not include duplicate
print(idx.union(idx2))
print(idx1.delete(0))
print(idx1.drop(2))

<h1>Multi Indexing</h1>

In [None]:
idx_tuples = [("Greenwich",2010), ("Greenwich",2011),
             ("Kingston", 2010), ("Kingston",2011),
             ("Newham",2010), ("Newham",2011),
             ("Hackney",2010), ("Hackney",2011),
             ("Tooting",2010), ("Tooting",2011),
             ("Hammersmith",2010), ("Hammersmith",2011)]

print(idx_tuples)

pop_tuples = [10321231, 93120931,
             32131233, 32132133,
             31232131, 32132144,
             31234235, 43538765,
             65634112, 42363265,
             12312412, 65745845]

population = pd.Series(pop_tuples, index=idx_tuples)
print(population)

In [None]:
midx = pd.MultiIndex.from_tuples(idx_tuples)
print(midx)

population = population.reindex(midx)
print (population)

print(population[: , 2010])
print(population["Greenwich", :])
print(population["Greenwich", 2010])

In [None]:
uk_mdf = population.unstack() #unstack converts multi-index in to a DataFrame
print(uk_mdf)

uk_mdf = population.stack() #stack converts it back to multi-index

In [71]:
male_tuples = [1231535, 123156154,
             1231563, 15648912,
             1231561, 12135614,
             1531584, 48946351,
             4894165, 48949849,
             41565, 4894123]
print(male_tuples)

# Addding an eatra column in a grid.
korea_mdf = pd.DataFrame({"Total_Population" : population,
                         "Male_Population" : male_tuples})

print(korea_mdf)

female_tuples = [3231535, 123156154,
             145563, 15648912,
             231561, 12135614,
             131584, 43351,
             4894165, 48949849,
             41565, 4894123]
print(female_tuples)

# Addding an eatra column in a grid.
korea_mdf = pd.DataFrame({"Total_Population" : population,
                         "Male_Population" : male_tuples,
                         "Female_Population" : female_tuples})

print(korea_mdf)

[1231535, 123156154, 1231563, 15648912, 1231561, 12135614, 1531584, 48946351, 4894165, 48949849, 41565, 4894123]
                   Total_Population  Male_Population
Borough     Years                                   
Greenwich   2010           10321231          1231535
            2011           93120931        123156154
Kingston    2010           32131233          1231563
            2011           32132133         15648912
Newham      2010           31232131          1231561
            2011           32132144         12135614
Hackney     2010           31234235          1531584
            2011           43538765         48946351
Tooting     2010           65634112          4894165
            2011           42363265         48949849
Hammersmith 2010           12312412            41565
            2011           65745845          4894123
[3231535, 123156154, 145563, 15648912, 231561, 12135614, 131584, 43351, 4894165, 48949849, 41565, 4894123]
                   Total_Population  M

In [None]:
male_tuples = [1231535, 123156154,
             1231563, 15648912,
             1231561, 12135614,
             1531584, 48946351,
             4894165, 48949849,
             41565, 4894123]

female_tuples = [3231535, 123156154,
             145563, 15648912,
             231561, 12135614,
             131584, 43351,
             4894165, 48949849,
             41565, 4894123]
print(female_tuples)

male_ratio = (korea_mdf["Male_Population"]*100 / korea_mdf["Female_Population"])
female_ratio = (korea_mdf["Female_Population"]*100 / korea_mdf["Male_Population"])

# Addding an eatra column in a grid.
korea_mdf = pd.DataFrame({"Total_Population" : population,
                         "Male_Population" : male_tuples,
                         "Female_Population" : female_tuples,
                         "Male_Ratio" : male_ratio,
                         "Female_Ratio" : female_ratio})
print(korea_mdf)

In [79]:
print(population[["Greenwich", "Hackney"]])
print(population[population > 3000])

Borough    Years
Greenwich  2010     10321231
           2011     93120931
Hackney    2010     31234235
           2011     43538765
dtype: int64
Borough      Years
Greenwich    2010     10321231
             2011     93120931
Kingston     2010     32131233
             2011     32132133
Newham       2010     31232131
             2011     32132144
Hackney      2010     31234235
             2011     43538765
Tooting      2010     65634112
             2011     42363265
Hammersmith  2010     12312412
             2011     65745845
dtype: int64


In [None]:
# Creating a random table
df = pd.DataFrame(np.random.rand(6,3),
                 index =[["a","a","b","b","c","c"],[1,2,1,2,1,2]],
                  columns = [["c1","c2","c3"]])

print(df)

In [None]:
pd.MultiIndex.from_arrays([["a","a","b","b","c","c"],[1,2,1,2,1,2]])

In [None]:
pd.MultiIndex.from_tuples([("a", 1), ("a", 2),("b", 1),("b", 2),("c", 1), ("c", 2)])

In [None]:
pd.MultiIndex.from_product([["a","b","c"], [1,2]]) #a*1, a*2, b*1, b*2 etc

In [None]:
pd.MultiIndex(levels=[["a", "b", "c"], [1,2]],
             codes=[[0,0,1,1,2,2], [0,1,0,1,0,1]])

In [None]:
print(population)

In [None]:
#Give title for columns
population.index.names = ["Borough", "Years"]
print(population)

In [None]:
#Creation of a Multi-Index DataFrame

idx = pd.MultiIndex.from_product([["a","b","c"], [1,2]],
                                names = ["name1", "name2"])

col = pd.MultiIndex.from_product([["c1","c2","c3"], [1,2]],
                                names = ["col_name1", "col_name2"])

data = np.round(np.random.randn(6,6),2)
mdf = pd.DataFrame(data, index=idx, columns=col)

print(mdf)

In [82]:
print(mdf["c2"])
print(mdf["c2", 1])
print(mdf.iloc[:3, :4]) #iloc True index
print(mdf.loc[:, ("c2", 1)])

col_name2       1     2
name1 name2            
a     1     -1.85  0.61
      2     -1.20 -2.16
b     1     -0.98 -0.66
      2     -2.42  0.59
c     1     -0.47 -0.70
      2     -0.05 -1.93
name1  name2
a      1       -1.85
       2       -1.20
b      1       -0.98
       2       -2.42
c      1       -0.47
       2       -0.05
Name: (c2, 1), dtype: float64
col_name1      c1          c2      
col_name2       1     2     1     2
name1 name2                        
a     1     -1.15  0.91 -1.85  0.61
      2     -0.88 -1.59 -1.20 -2.16
b     1     -0.05  0.01 -0.98 -0.66
name1  name2
a      1       -1.85
       2       -1.20
b      1       -0.98
       2       -2.42
c      1       -0.47
       2       -0.05
Name: (c2, 1), dtype: float64


In [83]:
print(korea_mdf)

                   Total_Population  Male_Population  Female_Population
Borough     Years                                                      
Greenwich   2010           10321231          1231535            3231535
            2011           93120931        123156154          123156154
Kingston    2010           32131233          1231563             145563
            2011           32132133         15648912           15648912
Newham      2010           31232131          1231561             231561
            2011           32132144         12135614           12135614
Hackney     2010           31234235          1531584             131584
            2011           43538765         48946351              43351
Tooting     2010           65634112          4894165            4894165
            2011           42363265         48949849           48949849
Hammersmith 2010           12312412            41565              41565
            2011           65745845          4894123            

In [85]:
# you must sort the list before slicing by Multi-index
korea_mdf = korea_mdf.sort_index()
korea_mdf["Greenwich" : "Hackney"]


Unnamed: 0_level_0,Unnamed: 1_level_0,Total_Population,Male_Population,Female_Population
Borough,Years,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Greenwich,2010,10321231,1231535,3231535
Greenwich,2011,93120931,123156154,123156154
Hackney,2010,31234235,1531584,131584
Hackney,2011,43538765,48946351,43351


In [86]:
korea_mdf.unstack(level=0)

Unnamed: 0_level_0,Total_Population,Total_Population,Total_Population,Total_Population,Total_Population,Total_Population,Male_Population,Male_Population,Male_Population,Male_Population,Male_Population,Male_Population,Female_Population,Female_Population,Female_Population,Female_Population,Female_Population,Female_Population
Borough,Greenwich,Hackney,Hammersmith,Kingston,Newham,Tooting,Greenwich,Hackney,Hammersmith,Kingston,Newham,Tooting,Greenwich,Hackney,Hammersmith,Kingston,Newham,Tooting
Years,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2010,10321231,31234235,12312412,32131233,31232131,65634112,1231535,1531584,41565,1231563,1231561,4894165,3231535,131584,41565,145563,231561,4894165
2011,93120931,43538765,65745845,32132133,32132144,42363265,123156154,48946351,4894123,15648912,12135614,48949849,123156154,43351,4894123,15648912,12135614,48949849


In [88]:
korea_mdf.unstack(level=1)

Unnamed: 0_level_0,Total_Population,Total_Population,Male_Population,Male_Population,Female_Population,Female_Population
Years,2010,2011,2010,2011,2010,2011
Borough,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Greenwich,10321231,93120931,1231535,123156154,3231535,123156154
Hackney,31234235,43538765,1531584,48946351,131584,43351
Hammersmith,12312412,65745845,41565,4894123,41565,4894123
Kingston,32131233,32132133,1231563,15648912,145563,15648912
Newham,31232131,32132144,1231561,12135614,231561,12135614
Tooting,65634112,42363265,4894165,48949849,4894165,48949849


In [89]:
korea_mdf.stack()

Borough      Years                   
Greenwich    2010   Total_Population      10321231
                    Male_Population        1231535
                    Female_Population      3231535
             2011   Total_Population      93120931
                    Male_Population      123156154
                    Female_Population    123156154
Hackney      2010   Total_Population      31234235
                    Male_Population        1531584
                    Female_Population       131584
             2011   Total_Population      43538765
                    Male_Population       48946351
                    Female_Population        43351
Hammersmith  2010   Total_Population      12312412
                    Male_Population          41565
                    Female_Population        41565
             2011   Total_Population      65745845
                    Male_Population        4894123
                    Female_Population      4894123
Kingston     2010   Total_Population      32

In [90]:
print(korea_mdf)

                   Total_Population  Male_Population  Female_Population
Borough     Years                                                      
Greenwich   2010           10321231          1231535            3231535
            2011           93120931        123156154          123156154
Hackney     2010           31234235          1531584             131584
            2011           43538765         48946351              43351
Hammersmith 2010           12312412            41565              41565
            2011           65745845          4894123            4894123
Kingston    2010           32131233          1231563             145563
            2011           32132133         15648912           15648912
Newham      2010           31232131          1231561             231561
            2011           32132144         12135614           12135614
Tooting     2010           65634112          4894165            4894165
            2011           42363265         48949849           4

In [94]:
idx_flat = korea_mdf.reset_index(level=0)
print(idx_flat)

           Borough  Total_Population  Male_Population  Female_Population
Years                                                                   
2010     Greenwich          10321231          1231535            3231535
2011     Greenwich          93120931        123156154          123156154
2010       Hackney          31234235          1531584             131584
2011       Hackney          43538765         48946351              43351
2010   Hammersmith          12312412            41565              41565
2011   Hammersmith          65745845          4894123            4894123
2010      Kingston          32131233          1231563             145563
2011      Kingston          32132133         15648912           15648912
2010        Newham          31232131          1231561             231561
2011        Newham          32132144         12135614           12135614
2010       Tooting          65634112          4894165            4894165
2011       Tooting          42363265         489498

In [97]:
idx_flat = korea_mdf.reset_index(level=(0,1))
print(idx_flat)

        Borough  Years  Total_Population  Male_Population  Female_Population
0     Greenwich   2010          10321231          1231535            3231535
1     Greenwich   2011          93120931        123156154          123156154
2       Hackney   2010          31234235          1531584             131584
3       Hackney   2011          43538765         48946351              43351
4   Hammersmith   2010          12312412            41565              41565
5   Hammersmith   2011          65745845          4894123            4894123
6      Kingston   2010          32131233          1231563             145563
7      Kingston   2011          32132133         15648912           15648912
8        Newham   2010          31232131          1231561             231561
9        Newham   2011          32132144         12135614           12135614
10      Tooting   2010          65634112          4894165            4894165
11      Tooting   2011          42363265         48949849           48949849

In [98]:
idx_flat.set_index(["Borough", "Years"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_Population,Male_Population,Female_Population
Borough,Years,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Greenwich,2010,10321231,1231535,3231535
Greenwich,2011,93120931,123156154,123156154
Hackney,2010,31234235,1531584,131584
Hackney,2011,43538765,48946351,43351
Hammersmith,2010,12312412,41565,41565
Hammersmith,2011,65745845,4894123,4894123
Kingston,2010,32131233,1231563,145563
Kingston,2011,32132133,15648912,15648912
Newham,2010,31232131,1231561,231561
Newham,2011,32132144,12135614,12135614
