In [1]:
import pandas as pd
import numpy as np
path=""

##  <font style="color:rgb(34,169,34)">  File Reading </font> 

In [2]:
homelessness = pd.read_csv(path+"homelessness.csv")

##  <font style="color:rgb(34,169,34)">head() method is used to return top n (5 by default) rows of a data frame or series. </font> 

In [3]:
homelessness.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
0,East South Central,Alabama,2570,864,4887681
1,Pacific,Alaska,1434,582,735139
2,Mountain,Arizona,7259,2606,7158024
3,West South Central,Arkansas,2280,432,3009733
4,Pacific,California,109008,20964,39461588


In [4]:
# Print the shape of homelessness
print(homelessness.shape)

(51, 5)


In [5]:
# Print a description of homelessness
print(homelessness.describe())

         individuals  family_members     state_pop
count      51.000000       51.000000  5.100000e+01
mean     7225.784314     3504.882353  6.405637e+06
std     15991.025083     7805.411811  7.327258e+06
min       434.000000       75.000000  5.776010e+05
25%      1446.500000      592.000000  1.777414e+06
50%      3082.000000     1482.000000  4.461153e+06
75%      6781.500000     3196.000000  7.340946e+06
max    109008.000000    52070.000000  3.946159e+07


In [6]:
# Print the column index of homelessness
print(homelessness.columns)

Index(['region', 'state', 'individuals', 'family_members', 'state_pop'], dtype='object')


In [7]:
# Print the row index of homelessness
print(homelessness.index)

RangeIndex(start=0, stop=51, step=1)


# Sorting and subsetting

In [8]:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html

In [9]:
# Sort homelessness by individual
homelessness_ind = homelessness.sort_values("individuals")

# Print the top few rows
homelessness_ind.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
50,Mountain,Wyoming,434,205,577601
34,West North Central,North Dakota,467,75,758080
7,South Atlantic,Delaware,708,374,965479
39,New England,Rhode Island,747,354,1058287
45,New England,Vermont,780,511,624358


In [10]:
# Sort homelessness by region, then descending family members
homelessness_reg_fam = homelessness.sort_values(["region","family_members"],ascending=[True,False])

# Print the top few rows
homelessness_reg_fam.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
13,East North Central,Illinois,6752,3891,12723071
35,East North Central,Ohio,6929,3320,11676341
22,East North Central,Michigan,5209,3142,9984072
49,East North Central,Wisconsin,2740,2167,5807406
14,East North Central,Indiana,3776,1482,6695497


Subsetting columns

In [11]:
# Select the individuals column
individuals = homelessness["individuals"]

# Print the head of the result
individuals.head()

0      2570
1      1434
2      7259
3      2280
4    109008
Name: individuals, dtype: int64

In [12]:
# Select the state and family_members columns
state_fam = homelessness[["state","family_members"]]

# Print the head of the result
state_fam.head()

Unnamed: 0,state,family_members
0,Alabama,864
1,Alaska,582
2,Arizona,2606
3,Arkansas,432
4,California,20964


In [13]:
# Filter for rows where individuals is greater than 10000
ind_gt_10k = homelessness[homelessness["individuals"]>10000]

# See the result
ind_gt_10k.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
4,Pacific,California,109008,20964,39461588
9,South Atlantic,Florida,21443,9587,21244317
32,Mid-Atlantic,New York,39827,52070,19530351
37,Pacific,Oregon,11139,3337,4181886
43,West South Central,Texas,19199,6111,28628666


In [14]:
# Filter for rows where region is Mountain
mountain_reg = homelessness[homelessness["region"]=="Mountain"]

# See the result
mountain_reg.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
2,Mountain,Arizona,7259,2606,7158024
5,Mountain,Colorado,7607,3250,5691287
12,Mountain,Idaho,1297,715,1750536
26,Mountain,Montana,983,422,1060665
28,Mountain,Nevada,7058,486,3027341


In [15]:
# Filter for rows where family_members is less than 1000 
# and region is Pacific
fam_lt_1k_pac = homelessness[(homelessness["family_members"]<1000) & (homelessness["region"]=="Pacific")]

# See the result
fam_lt_1k_pac.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
1,Pacific,Alaska,1434,582,735139


In [16]:
# Subset for rows in South Atlantic or Mid-Atlantic regions
south_mid_atlantic =  homelessness[(homelessness["region"]=="South Atlantic") | (homelessness["region"]=="Mid-Atlantic")]

# See the result
south_mid_atlantic.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
7,South Atlantic,Delaware,708,374,965479
8,South Atlantic,District of Columbia,3770,3134,701547
9,South Atlantic,Florida,21443,9587,21244317
10,South Atlantic,Georgia,6943,2556,10511131
20,South Atlantic,Maryland,4914,2230,6035802


# Adding new columns

In [17]:
# Add total col as sum of individuals and family_members
homelessness["total"] = homelessness["individuals"] + homelessness["family_members"]

# Add p_individuals col as proportion of individuals
homelessness["p_individuals"] = homelessness["individuals"] / homelessness["total"]

# See the result
homelessness.head()

Unnamed: 0,region,state,individuals,family_members,state_pop,total,p_individuals
0,East South Central,Alabama,2570,864,4887681,3434,0.748398
1,Pacific,Alaska,1434,582,735139,2016,0.71131
2,Mountain,Arizona,7259,2606,7158024,9865,0.735834
3,West South Central,Arkansas,2280,432,3009733,2712,0.840708
4,Pacific,California,109008,20964,39461588,129972,0.838704


In [18]:
# Create indiv_per_10k col as homeless individuals per 10k state pop
homelessness["indiv_per_10k"] = 10000 * homelessness["individuals"] / homelessness["state_pop"] 

# Subset rows for indiv_per_10k greater than 20
high_homelessness = homelessness[homelessness["indiv_per_10k"] > 20]

# Sort high_homelessness by descending indiv_per_10k
high_homelessness_srt = high_homelessness.sort_values("indiv_per_10k", ascending=False)

# From high_homelessness_srt, select the state and indiv_per_10k cols
result = high_homelessness_srt[["state", "indiv_per_10k"]]

# See the result
result.head()

Unnamed: 0,state,indiv_per_10k
8,District of Columbia,53.738381
11,Hawaii,29.079406
4,California,27.623825
37,Oregon,26.636307
28,Nevada,23.314189


# Summary statistics

In [19]:
# this data set contains weekly sales in US dollars in various stores
# each store has id number and specific store type
# unemp is national employement rate that week
sales = pd.read_csv(path+"sales_subset.csv")
sales.head()

Unnamed: 0,store,type,department,date,weekly_sales,is_holiday,temperature_c,fuel_price_usd_per_l,unemployment
0,1,A,1,2/5/2010,24924.5,False,5.727778,0.679451,8.106
1,1,A,1,3/5/2010,21827.9,False,8.055556,0.693452,8.106
2,1,A,1,4/2/2010,57258.43,False,16.816667,0.718284,7.808
3,1,A,1,5/7/2010,17413.94,False,22.527778,0.748928,7.808
4,1,A,1,6/4/2010,17558.09,False,27.05,0.714586,7.808


In [20]:
# Print the mean of weekly_sales
print(sales["weekly_sales"].mean())

23843.950148505668


In [21]:
# Print the median of weekly_sales
print(sales["weekly_sales"].median())

12049.064999999999


In [22]:
# Print the maximum of the date column
print(sales['date'].max())

# Print the minimum of the date column
print(sales['date'].min())

9/9/2011
1/13/2012


In [23]:
# A custom IQR function
def iqr(column):
    return column.quantile(0.75) - column.quantile(0.25)
    
# Print IQR of the temperature_c column
print(sales["temperature_c"].agg(iqr))

16.583333337000003


In [24]:
# A custom IQR function
def iqr(column):
    return column.quantile(0.75) - column.quantile(0.25)

# Update to print IQR of temperature_c, fuel_price_usd_per_l, & unemployment
print(sales[["temperature_c", "fuel_price_usd_per_l", "unemployment"]].agg(iqr))

temperature_c           16.583333
fuel_price_usd_per_l     0.073176
unemployment             0.565000
dtype: float64


In [25]:
# Update to print IQR and median of temperature_c, fuel_price_usd_per_l, & unemployment
print(sales[["temperature_c", "fuel_price_usd_per_l", "unemployment"]].agg([iqr, np.median]))

        temperature_c  fuel_price_usd_per_l  unemployment
iqr         16.583333              0.073176         0.565
median      16.966667              0.743381         8.099


# Dropping duplicates

In [26]:
# Drop duplicate store/type combinations
sales['date']=pd.to_datetime(sales['date'])
store_types = sales.drop_duplicates(subset=["store", "type"])
store_types.head()

Unnamed: 0,store,type,department,date,weekly_sales,is_holiday,temperature_c,fuel_price_usd_per_l,unemployment
0,1,A,1,2010-02-05,24924.5,False,5.727778,0.679451,8.106
901,2,A,1,2010-02-05,35034.06,False,4.55,0.679451,8.324
1798,4,A,1,2010-02-05,38724.42,False,6.533333,0.686319,8.623
2699,6,A,1,2010-02-05,25619.0,False,4.683333,0.679451,7.259
3593,10,B,1,2010-02-05,40212.84,False,12.411111,0.782478,9.765


# Groupping

In [27]:
# Group by type; calc total weekly sales
sales_by_type = sales.groupby("type")["weekly_sales"].sum()

sales_by_type.head()

type
A    2.337163e+08
B    2.317840e+07
Name: weekly_sales, dtype: float64

In [28]:
# Get proportion for each type
sales_propn_by_type = sales_by_type / sum(sales_by_type)
print(sales_propn_by_type)

type
A    0.909775
B    0.090225
Name: weekly_sales, dtype: float64


In [29]:
sales_by_type_is_holiday = sales.groupby(["type", "is_holiday"])["weekly_sales"].sum()
sales_by_type_is_holiday.head()

type  is_holiday
A     False         2.336927e+08
      True          2.360181e+04
B     False         2.317678e+07
      True          1.621410e+03
Name: weekly_sales, dtype: float64

In [30]:
sales.head()

Unnamed: 0,store,type,department,date,weekly_sales,is_holiday,temperature_c,fuel_price_usd_per_l,unemployment
0,1,A,1,2010-02-05,24924.5,False,5.727778,0.679451,8.106
1,1,A,1,2010-03-05,21827.9,False,8.055556,0.693452,8.106
2,1,A,1,2010-04-02,57258.43,False,16.816667,0.718284,7.808
3,1,A,1,2010-05-07,17413.94,False,22.527778,0.748928,7.808
4,1,A,1,2010-06-04,17558.09,False,27.05,0.714586,7.808


In [31]:
# Import numpy with the alias np
import numpy as np

# For each store type, aggregate weekly_sales: get min, max, mean, and median
sales_stats = sales.groupby("type")["weekly_sales"].agg([np.min, np.max, np.mean, np.median])

# Print sales_stats
sales_stats.head()

Unnamed: 0_level_0,amin,amax,mean,median
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,-1098.0,293966.05,23674.667242,11943.92
B,-798.0,232558.51,25696.67837,13336.08


# Indexes

In [32]:
temperatures = pd.read_csv(path+"temperatures.csv")
temperatures.head()

Unnamed: 0,date,city,country,avg_temp_c
0,1/1/2000,Abidjan,Côte D'Ivoire,27.293
1,2/1/2000,Abidjan,Côte D'Ivoire,27.685
2,3/1/2000,Abidjan,Côte D'Ivoire,29.061
3,4/1/2000,Abidjan,Côte D'Ivoire,28.162
4,5/1/2000,Abidjan,Côte D'Ivoire,27.547


In [33]:
temperatures_ind = temperatures.set_index("city")
temperatures_ind.head()

Unnamed: 0_level_0,date,country,avg_temp_c
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Abidjan,1/1/2000,Côte D'Ivoire,27.293
Abidjan,2/1/2000,Côte D'Ivoire,27.685
Abidjan,3/1/2000,Côte D'Ivoire,29.061
Abidjan,4/1/2000,Côte D'Ivoire,28.162
Abidjan,5/1/2000,Côte D'Ivoire,27.547


In [34]:
# Index temperatures by country & city
temperatures_ind = temperatures.set_index(["country","city"])

# List of tuples: Brazil, Rio De Janeiro & Pakistan, Lahore
rows_to_keep = [("Brazil","Rio De Janeiro"),("Pakistan","Lahore")]

# Subset for rows to keep
temperatures_ind.loc[rows_to_keep]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Brazil,Rio De Janeiro,1/1/2000,25.974
Brazil,Rio De Janeiro,2/1/2000,26.699
Brazil,Rio De Janeiro,3/1/2000,26.270
Brazil,Rio De Janeiro,4/1/2000,25.750
Brazil,Rio De Janeiro,5/1/2000,24.356
...,...,...,...
Pakistan,Lahore,5/1/2013,33.457
Pakistan,Lahore,6/1/2013,34.456
Pakistan,Lahore,7/1/2013,33.279
Pakistan,Lahore,8/1/2013,31.511


# Slicing

In [35]:
# Sort the index of temperatures_ind
temperatures_srt = temperatures_ind.sort_index()
temperatures_srt.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,Kabul,1/1/2000,3.326
Afghanistan,Kabul,2/1/2000,3.454
Afghanistan,Kabul,3/1/2000,9.612
Afghanistan,Kabul,4/1/2000,17.925
Afghanistan,Kabul,5/1/2000,24.658


In [36]:
# Subset rows from Pakistan to Russia
temperatures_srt.loc["Pakistan":"Russia"]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Pakistan,Faisalabad,1/1/2000,12.792
Pakistan,Faisalabad,2/1/2000,14.339
Pakistan,Faisalabad,3/1/2000,20.309
Pakistan,Faisalabad,4/1/2000,29.072
Pakistan,Faisalabad,5/1/2000,34.845
...,...,...,...
Russia,Saint Petersburg,5/1/2013,12.355
Russia,Saint Petersburg,6/1/2013,17.185
Russia,Saint Petersburg,7/1/2013,17.234
Russia,Saint Petersburg,8/1/2013,17.153


In [37]:
# Try to subset rows from Lahore to Moscow
temperatures_srt.loc["Lahore":"Moscow"]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Mexico,Mexico,1/1/2000,12.694
Mexico,Mexico,2/1/2000,14.677
Mexico,Mexico,3/1/2000,17.376
Mexico,Mexico,4/1/2000,18.294
Mexico,Mexico,5/1/2000,18.562
...,...,...,...
Morocco,Casablanca,5/1/2013,19.217
Morocco,Casablanca,6/1/2013,23.649
Morocco,Casablanca,7/1/2013,27.488
Morocco,Casablanca,8/1/2013,27.952


In [38]:
# Subset rows from Pakistan, Lahore to Russia, Moscow
temperatures_srt.loc[("Pakistan", "Lahore"):("Russia", "Moscow")]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Pakistan,Lahore,1/1/2000,12.792
Pakistan,Lahore,2/1/2000,14.339
Pakistan,Lahore,3/1/2000,20.309
Pakistan,Lahore,4/1/2000,29.072
Pakistan,Lahore,5/1/2000,34.845
...,...,...,...
Russia,Moscow,5/1/2013,16.152
Russia,Moscow,6/1/2013,18.718
Russia,Moscow,7/1/2013,18.136
Russia,Moscow,8/1/2013,17.485


In [39]:
temperatures_srt.loc[("India","Hyderabad"):("Iraq","Baghdad"),"date":"avg_temp_c"]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,avg_temp_c
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1
India,Hyderabad,1/1/2000,23.779
India,Hyderabad,2/1/2000,25.826
India,Hyderabad,3/1/2000,28.821
India,Hyderabad,4/1/2000,32.698
India,Hyderabad,5/1/2000,32.438
...,...,...,...
Iraq,Baghdad,5/1/2013,28.673
Iraq,Baghdad,6/1/2013,33.803
Iraq,Baghdad,7/1/2013,36.392
Iraq,Baghdad,8/1/2013,35.463


In [40]:
temperatures['date']= pd.to_datetime(temperatures['date'])
temperatures.head()

Unnamed: 0,date,city,country,avg_temp_c
0,2000-01-01,Abidjan,Côte D'Ivoire,27.293
1,2000-02-01,Abidjan,Côte D'Ivoire,27.685
2,2000-03-01,Abidjan,Côte D'Ivoire,29.061
3,2000-04-01,Abidjan,Côte D'Ivoire,28.162
4,2000-05-01,Abidjan,Côte D'Ivoire,27.547


In [41]:
# Use Boolean conditions to subset temperatures for rows in 2010 and 2011
temperatures_bool = temperatures[(temperatures["date"] >= "2010-01-01") & (temperatures["date"] <= "2011-12-31")]
temperatures_bool.head()

Unnamed: 0,date,city,country,avg_temp_c
120,2010-01-01,Abidjan,Côte D'Ivoire,28.27
121,2010-02-01,Abidjan,Côte D'Ivoire,29.262
122,2010-03-01,Abidjan,Côte D'Ivoire,29.596
123,2010-04-01,Abidjan,Côte D'Ivoire,29.068
124,2010-05-01,Abidjan,Côte D'Ivoire,28.258


In [42]:
# Set date as the index and sort the index
temperatures_ind = temperatures.set_index("date").sort_index()

# Use .loc[] to subset temperatures_ind for rows in 2010 and 2011
temperatures_ind.loc["2010":"2011"]

Unnamed: 0_level_0,city,country,avg_temp_c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,Faisalabad,Pakistan,11.810
2010-01-01,Melbourne,Australia,20.016
2010-01-01,Chongqing,China,7.921
2010-01-01,São Paulo,Brazil,23.738
2010-01-01,Guangzhou,China,14.136
...,...,...,...
2011-12-01,Nagoya,Japan,6.476
2011-12-01,Hyderabad,India,23.613
2011-12-01,Cali,Colombia,21.559
2011-12-01,Lima,Peru,18.293


In [43]:
# Use .loc[] to subset temperatures_ind for rows from Aug 2010 to Feb 2011
temperatures_ind.loc["2010-08":"2011-02"]

Unnamed: 0_level_0,city,country,avg_temp_c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-08-01,Calcutta,India,30.226
2010-08-01,Pune,India,24.941
2010-08-01,Izmir,Turkey,28.352
2010-08-01,Tianjin,China,25.543
2010-08-01,Manila,Philippines,27.101
...,...,...,...
2011-02-01,Kabul,Afghanistan,3.914
2011-02-01,Chicago,United States,0.276
2011-02-01,Aleppo,Syria,8.246
2011-02-01,Delhi,India,18.136


In [44]:
# Get 23rd row, 2nd column (index 22, 1)
print(temperatures.iloc[22, 1])

Abidjan


In [45]:
# Use slicing to get the first 5 rows
temperatures.iloc[:5]

Unnamed: 0,date,city,country,avg_temp_c
0,2000-01-01,Abidjan,Côte D'Ivoire,27.293
1,2000-02-01,Abidjan,Côte D'Ivoire,27.685
2,2000-03-01,Abidjan,Côte D'Ivoire,29.061
3,2000-04-01,Abidjan,Côte D'Ivoire,28.162
4,2000-05-01,Abidjan,Côte D'Ivoire,27.547


In [46]:
# Use slicing to get columns 3 to 4
temperatures.iloc[:, 2:4]

Unnamed: 0,country,avg_temp_c
0,Côte D'Ivoire,27.293
1,Côte D'Ivoire,27.685
2,Côte D'Ivoire,29.061
3,Côte D'Ivoire,28.162
4,Côte D'Ivoire,27.547
...,...,...
16495,China,18.979
16496,China,23.522
16497,China,25.251
16498,China,24.528


In [47]:
# Use slicing in both directions at once
temperatures.iloc[:5, 2:4]

Unnamed: 0,country,avg_temp_c
0,Côte D'Ivoire,27.293
1,Côte D'Ivoire,27.685
2,Côte D'Ivoire,29.061
3,Côte D'Ivoire,28.162
4,Côte D'Ivoire,27.547


In [48]:
# Add a year column to temperatures
temperatures["year"] = temperatures["date"].dt.year

# Pivot avg_temp_c by country and city vs year
temp_by_country_city_vs_year = temperatures.pivot_table("avg_temp_c", index = ["country", "city"], columns = "year")

# See the result
temp_by_country_city_vs_year.head()

Unnamed: 0_level_0,year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Afghanistan,Kabul,15.822667,15.847917,15.714583,15.132583,16.128417,14.8475,15.7985,15.518,15.47925,15.093333,15.676,15.812167,14.510333,16.206125
Angola,Luanda,24.410333,24.427083,24.790917,24.867167,24.216167,24.414583,24.138417,24.241583,24.266333,24.325083,24.44025,24.15075,24.240083,24.553875
Australia,Melbourne,14.320083,14.18,14.075833,13.985583,13.742083,14.3785,13.991083,14.991833,14.110583,14.647417,14.231667,14.190917,14.268667,14.7415
Australia,Sydney,17.567417,17.8545,17.733833,17.592333,17.869667,18.028083,17.7495,18.020833,17.321083,18.175833,17.999,17.713333,17.474333,18.08975
Bangladesh,Dhaka,25.90525,25.93125,26.095,25.927417,26.136083,26.193333,26.440417,25.951333,26.0045,26.535583,26.648167,25.80325,26.283583,26.587


In [49]:
# Subset for Egypt to India
temp_by_country_city_vs_year.loc["Egypt":"India"]

Unnamed: 0_level_0,year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Egypt,Alexandria,20.7445,21.454583,21.456167,21.221417,21.064167,21.082333,21.148167,21.50775,21.739,21.6705,22.459583,21.1815,21.552583,21.4385
Egypt,Cairo,21.486167,22.330833,22.414083,22.1705,22.081917,22.0065,22.05,22.361,22.6445,22.625,23.71825,21.986917,22.48425,22.907
Egypt,Gizeh,21.486167,22.330833,22.414083,22.1705,22.081917,22.0065,22.05,22.361,22.6445,22.625,23.71825,21.986917,22.48425,22.907
Ethiopia,Addis Abeba,18.24125,18.296417,18.46975,18.320917,18.29275,18.312833,18.427083,18.142583,18.165,18.765333,18.29825,18.60675,18.448583,19.539
France,Paris,11.739667,11.37125,11.871333,11.9095,11.338833,11.552917,11.7885,11.750833,11.27825,11.464083,10.409833,12.32575,11.219917,11.011625
Germany,Berlin,10.963667,9.69025,10.264417,10.06575,9.822583,9.919083,10.545333,10.883167,10.65775,10.0625,8.606833,10.556417,9.964333,10.1215
India,Ahmadabad,27.436,27.198083,27.719083,27.403833,27.628333,26.828083,27.282833,27.511167,27.0485,28.095833,28.017833,27.290417,27.02725,27.608625
India,Bangalore,25.337917,25.528167,25.755333,25.92475,25.252083,25.4765,25.41825,25.464333,25.352583,25.72575,25.70525,25.362083,26.042333,26.6105
India,Bombay,27.203667,27.243667,27.628667,27.578417,27.31875,27.03575,27.3815,27.634667,27.17775,27.8445,27.765417,27.384917,27.1925,26.713
India,Calcutta,26.491333,26.515167,26.703917,26.561333,26.634333,26.729167,26.98625,26.584583,26.522333,27.15325,27.288833,26.406917,26.935083,27.36925


In [50]:
# Subset for Egypt, Cairo to India, Delhi
temp_by_country_city_vs_year.loc[("Egypt", "Cairo"):("India", "Delhi")]

Unnamed: 0_level_0,year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Egypt,Cairo,21.486167,22.330833,22.414083,22.1705,22.081917,22.0065,22.05,22.361,22.6445,22.625,23.71825,21.986917,22.48425,22.907
Egypt,Gizeh,21.486167,22.330833,22.414083,22.1705,22.081917,22.0065,22.05,22.361,22.6445,22.625,23.71825,21.986917,22.48425,22.907
Ethiopia,Addis Abeba,18.24125,18.296417,18.46975,18.320917,18.29275,18.312833,18.427083,18.142583,18.165,18.765333,18.29825,18.60675,18.448583,19.539
France,Paris,11.739667,11.37125,11.871333,11.9095,11.338833,11.552917,11.7885,11.750833,11.27825,11.464083,10.409833,12.32575,11.219917,11.011625
Germany,Berlin,10.963667,9.69025,10.264417,10.06575,9.822583,9.919083,10.545333,10.883167,10.65775,10.0625,8.606833,10.556417,9.964333,10.1215
India,Ahmadabad,27.436,27.198083,27.719083,27.403833,27.628333,26.828083,27.282833,27.511167,27.0485,28.095833,28.017833,27.290417,27.02725,27.608625
India,Bangalore,25.337917,25.528167,25.755333,25.92475,25.252083,25.4765,25.41825,25.464333,25.352583,25.72575,25.70525,25.362083,26.042333,26.6105
India,Bombay,27.203667,27.243667,27.628667,27.578417,27.31875,27.03575,27.3815,27.634667,27.17775,27.8445,27.765417,27.384917,27.1925,26.713
India,Calcutta,26.491333,26.515167,26.703917,26.561333,26.634333,26.729167,26.98625,26.584583,26.522333,27.15325,27.288833,26.406917,26.935083,27.36925
India,Delhi,26.048333,25.862917,26.634333,25.721083,26.239917,25.716083,26.365917,26.145667,25.675,26.55425,26.52025,25.6295,25.889417,26.70925


In [51]:
# Subset in both directions at once
temp_by_country_city_vs_year.loc[("Egypt", "Cairo"):("India", "Delhi"), "2005":"2010"]

Unnamed: 0_level_0,year,2005,2006,2007,2008,2009,2010
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Egypt,Cairo,22.0065,22.05,22.361,22.6445,22.625,23.71825
Egypt,Gizeh,22.0065,22.05,22.361,22.6445,22.625,23.71825
Ethiopia,Addis Abeba,18.312833,18.427083,18.142583,18.165,18.765333,18.29825
France,Paris,11.552917,11.7885,11.750833,11.27825,11.464083,10.409833
Germany,Berlin,9.919083,10.545333,10.883167,10.65775,10.0625,8.606833
India,Ahmadabad,26.828083,27.282833,27.511167,27.0485,28.095833,28.017833
India,Bangalore,25.4765,25.41825,25.464333,25.352583,25.72575,25.70525
India,Bombay,27.03575,27.3815,27.634667,27.17775,27.8445,27.765417
India,Calcutta,26.729167,26.98625,26.584583,26.522333,27.15325,27.288833
India,Delhi,25.716083,26.365917,26.145667,25.675,26.55425,26.52025


In [52]:
temp_by_country_city_vs_year.head()

Unnamed: 0_level_0,year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
country,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Afghanistan,Kabul,15.822667,15.847917,15.714583,15.132583,16.128417,14.8475,15.7985,15.518,15.47925,15.093333,15.676,15.812167,14.510333,16.206125
Angola,Luanda,24.410333,24.427083,24.790917,24.867167,24.216167,24.414583,24.138417,24.241583,24.266333,24.325083,24.44025,24.15075,24.240083,24.553875
Australia,Melbourne,14.320083,14.18,14.075833,13.985583,13.742083,14.3785,13.991083,14.991833,14.110583,14.647417,14.231667,14.190917,14.268667,14.7415
Australia,Sydney,17.567417,17.8545,17.733833,17.592333,17.869667,18.028083,17.7495,18.020833,17.321083,18.175833,17.999,17.713333,17.474333,18.08975
Bangladesh,Dhaka,25.90525,25.93125,26.095,25.927417,26.136083,26.193333,26.440417,25.951333,26.0045,26.535583,26.648167,25.80325,26.283583,26.587


In [53]:
# Get the worldwide mean temp by year
mean_temp_by_year = temp_by_country_city_vs_year.mean()
mean_temp_by_year.head()

year
2000    19.506243
2001    19.679352
2002    19.855685
2003    19.630197
2004    19.672204
dtype: float64

In [54]:
# Filter for the year that had the highest mean temp
mean_temp_by_year[mean_temp_by_year == mean_temp_by_year.max()]

year
2013    20.312285
dtype: float64

In [55]:
# Get the mean temp by city
mean_temp_by_city = temp_by_country_city_vs_year.mean(axis="columns")
mean_temp_by_city.head()

country      city     
Afghanistan  Kabul        15.541955
Angola       Luanda       24.391616
Australia    Melbourne    14.275411
             Sydney       17.799250
Bangladesh   Dhaka        26.174440
dtype: float64

In [56]:
# Filter for the city that had the lowest mean temp
mean_temp_by_city[mean_temp_by_city == mean_temp_by_city.min()]

country  city  
China    Harbin    4.876551
dtype: float64