In [1]:
import pandas as pd

In [2]:
# Read in land temperatures dataset
csv_path1 = "GlobalLandTemperaturesByCountry.csv"
df = pd.read_csv(csv_path1)
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
# Create new column that converts the date from an objecct to datetime
df['datetime'] = pd.to_datetime(df['dt'])
df.dtypes

dt                                       object
AverageTemperature                      float64
AverageTemperatureUncertainty           float64
Country                                  object
datetime                         datetime64[ns]
dtype: object

In [4]:
# Create new column that grabs the year from the datetime column
df["Year"] = df["datetime"].dt.to_period('Y')
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,datetime,Year
0,1743-11-01,4.384,2.294,Åland,1743-11-01,1743
1,1743-12-01,,,Åland,1743-12-01,1743
2,1744-01-01,,,Åland,1744-01-01,1744
3,1744-02-01,,,Åland,1744-02-01,1744
4,1744-03-01,,,Åland,1744-03-01,1744


In [5]:
# Filter for years 1990 and beyond to match carbon emissions dataset
df = df[df["Year"] >= "1990"]
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,datetime,Year
2954,1990-01-01,0.515,0.359,Åland,1990-01-01,1990
2955,1990-02-01,3.35,0.629,Åland,1990-02-01,1990
2956,1990-03-01,3.134,0.231,Åland,1990-03-01,1990
2957,1990-04-01,4.846,0.255,Åland,1990-04-01,1990
2958,1990-05-01,8.708,0.378,Åland,1990-05-01,1990


In [6]:
# Get each country's average annual land temperature using groupby
df = df.groupby(["Country", "Year"]).mean()
df.reset_index(inplace=True)
df.head()

Unnamed: 0,Country,Year,AverageTemperature,AverageTemperatureUncertainty
0,Afghanistan,1990,14.993333,0.362417
1,Afghanistan,1991,14.37075,0.411
2,Afghanistan,1992,14.056083,0.501667
3,Afghanistan,1993,14.43925,0.53925
4,Afghanistan,1994,14.75475,0.396833


In [7]:
# Pivot DataFrame to make each year a column
df = df.pivot(index='Country', columns='Year', values='AverageTemperature')
df.reset_index(inplace=True)
df.rename_axis(None, axis = 1, inplace=True)
df.head()

Unnamed: 0,Country,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
0,Afghanistan,14.993333,14.37075,14.056083,14.43925,14.75475,14.859167,14.426,14.904,15.1325,...,15.770917,14.98,15.595583,15.106167,15.2165,15.25775,15.828667,15.518,14.481583,16.533625
1,Africa,24.70375,24.42225,24.335417,24.538667,24.43875,24.718417,24.6345,24.706833,25.0205,...,24.895917,25.156167,24.818917,24.8515,24.754667,25.0265,25.4725,24.7865,24.725917,25.20875
2,Albania,13.468667,12.338833,13.084667,13.122583,14.099667,12.782083,12.7365,12.900917,13.31125,...,13.258167,12.747083,12.976917,13.8905,13.955167,13.84425,13.775417,13.44325,13.76825,14.993875
3,Algeria,24.014417,23.011083,22.767,23.450833,23.642083,23.7225,23.734333,23.955167,23.920583,...,23.916,24.222583,24.200833,24.065333,23.95025,24.154333,25.215667,24.144167,23.954833,25.1215
4,American Samoa,27.15625,27.171167,27.072,26.816417,26.986333,27.268667,27.112667,26.95575,27.424833,...,27.281167,27.3735,27.08125,27.452417,26.995083,27.03425,27.453417,27.0095,27.201417,27.51725


In [9]:
# Save DataFrame as json file
df.to_json('clean_land_temps.json', orient='records')