In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
os.getcwd()

# Importing the data

In [None]:
dataset1 = pd.read_csv('GlobalLandTemperaturesByCity.csv',index_col=['dt'])

In [None]:
dataFrame=pd.DataFrame(dataset1)
dataFrame.shape

In [None]:
DublinData = dataFrame[dataFrame["City"]=="Dublin"]
DublinData=DublinData.reset_index()
print(DublinData.head())

In [None]:
x=DublinData["dt"]
y=DublinData["AverageTemperature"]

In [None]:
fig,ax = plt.subplots()
ax.plot(x,y)
plt.show()

# Using loc and iloc on dataFrame

In [None]:
DublinData=DublinData.set_index('dt')

In [None]:
DublinData.loc["1743-11-01",:]

In [None]:
DublinData.iloc[100,:]

In [None]:
DublinData[DublinData["AverageTemperature"] > 17]

In [None]:
dataFrame[dataFrame["Country"]=="Ireland"]['City'].unique()

In [None]:
dataFrame[dataFrame["Country"]=="Ireland"].groupby('City')["AverageTemperature"].max()

In [None]:
DublinData[DublinData.isna()['AverageTemperature']==True]

# Forward Fill data

In [None]:
DublinDataFF = DublinData.fillna(method='ffill')

# Drop Missing Values

In [None]:
DublinData.dropna()

Function to update the missing values with the average value for that specific month

In [None]:
def cleaning_data(df):
    #df=df.reset_index()
    #casting date field to datetime format to utilise .dt.month attribute
    df['dt'] = pd.to_datetime(df['dt'])
    #creating a dictionary of the fields with null values
    dictionary = (df.isna().any()==True).to_dict()
    
    #iterating through the dictionary to find where True
    for col,val in dictionary.items():
        if val == True:                  
            #creating a dataframe of the null values
            null_df = df[df.isna()[col]==True]

            for i in null_df.index:
                month = df.loc[i,'dt'].to_pydatetime().month
                mean = df[df["dt"].dt.month == month].dropna()[col].mean()
                df.at[i,col]=mean
   

In [None]:
dataset1

# Analysing other elements of the dataset

In [None]:
dataset2 = pd.read_csv('GlobalLandTemperaturesByCountry.csv',index_col=['dt'])
dataFrame2 =pd.DataFrame(dataset2)
dataFrame2=dataFrame2.reset_index()

In [None]:
greenLandTemp = dataFrame2[dataFrame2['Country']=='Greenland']

In [None]:
cleaning_data(greenLandTemp)

In [None]:
greenLandTemp.duplicated().any()

In [None]:
plt.plot(greenLandTemp[greenLandTemp['dt'].dt.month==12],greenLandTemp['AverageTemperature'])

In [None]:
DecData = greenLandTemp[greenLandTemp['dt'].dt.month==12]
DecData = DecData[DecData['dt'].dt.year > 1970]
plt.plot(DecData['dt'],DecData['AverageTemperature'])

Import data from national snow and ice data center API

In [None]:
url = 'https://masie_web.apps.nsidc.org/pub//DATASETS/NOAA/G02135/seaice_analysis/N_Sea_Ice_Index_Regional_Daily_Data_G02135_v3.0.xlsx'
file = pd.read_excel(url,sheet_name=None)
#xls = pd.ExcelFile(file)

In [None]:
file2 = pd.read_excel(url,sheet_name=1)

In [None]:
dataframeSI = pd.DataFrame(file2)

In [None]:
dataframeSI['month']=dataframeSI['month'].fillna(method='ffill')

In [None]:
look_up = {'January':'01', 'February':'02', 'March':'03', 'April':'04', 'May':'05',
           'June':'06','July':'07', 'August':'08', 'September':'09', 'October':'10',
           'November':'11', 'December':'12'}

dataframeSI['month'] = dataframeSI['month'].apply(lambda x: look_up[x])

In [None]:
dataframeSI.head()

In [None]:
col_list = dataframeSI.keys()[2:]
i = 0
for col in col_list:
    df = dataframeSI[['month','day',col]]
    df = df.rename(columns = {col:"Extent"})
    df['Year'] = str(col)
    if i==0:
        df_corrected = df
    else:
        df_corrected= pd.concat([df_new,df],ignore_index=True,sort=True)
    i+=1
  

In [None]:
df_corrected=df_corrected.dropna()

In [None]:
df_corrected['Date']=pd.to_datetime(df_corrected[['Year', 'month', 'day']])

In [None]:
Julydata = df_corrected[df_corrected['Date'].dt.month==7]

In [None]:
JulyMeans = Julydata.groupby('Year',as_index=False)['Extent'].agg('mean')

In [None]:
#merging date column from Julydata to JulyMeans
JulyMeans = JulyMeans.merge(Julydata[Julydata['day']==1][['Year','Date']],how='right',left_on='Year',right_on='Year')

In [None]:
JulyMeans.head(3)

# task: creating a reusable function to easily change month of the year we are plotting

In [None]:
def plot_Temp_Data(month):
    
    #month = int(input('please enter the month (1-12) you would like to analyse (i.e Jan=1,Feb=2 etc.):'))  
    monthly_data = df_corrected[df_corrected['Date'].dt.month == month]
    monthly_mean = monthly_data.groupby('Year',as_index=False)['Extent'].agg('mean')
    monthly_mean = monthly_mean.merge(monthly_data[monthly_data['day']==1][['Year','Date']],how='right',left_on='Year',right_on='Year')
    
    fig,ax = plt.subplots()
    monthly_mean = monthly_mean.set_index('Date')['1979-07-01':'2013-07-01'].reset_index()
    x_data_mean = monthly_mean['Date']
    y_data_mean = monthly_mean['Extent']
    
    ax.plot(x_data_mean,y_data_mean,color='r',marker='v')
    
    monthly_Temp = greenLandTemp[greenLandTemp['dt'].dt.month == month]
    y_temp = monthly_Temp['AverageTemperature']
    x_temp = monthly_Temp['dt']
           
    ax2 = ax.twinx()
    ax2.plot(x_temp,y_temp,color='g',marker='o')
    ax2.set_ylabel('Average Temperature (Celcius)',color='g')
    
    month_val = {1:'January', 2:'February', 3:'March', 4:'April', 5:'May',
               6:'June',7:'July', 8:'August', 9:'September', 10:'October',
               11:'November', 12:'December'}
    
    month= month_val.get(month)
    
    
    ax.set_xlabel('Date (years)')
    ax.set_ylabel('Sea Ice Extent (km^2)',color='r')
    ax.set_title('Sea Ice extent vs Temperature for %s' %month)
    ax.tick_params('y')
    ax.tick_params('x',colors='blue')
    plt.grid(True)
    plt.show()    

In [None]:
plot_Temp_Data(5)

# Creating some insightful graphs

In [None]:
greenLandTemp=greenLandTemp.set_index('dt')
greenLandTemp = greenLandTemp['1979-01-01':'2013-09-01']
greenLandTemp=greenLandTemp.reset_index()

In [None]:
fig,ax = plt.subplots()
x_data = Julydata['Date']
y_data = Julydata['Extent']

#slice the data
JulyMeans=JulyMeans.set_index('Date')['1979-07-01':'2013-07-01'].reset_index()
x_data_mean = JulyMeans['Date']
y_data_mean = JulyMeans['Extent']

#ax.plot(x_data,y_data,marker='o',color = 'gray')

ax.plot(x_data_mean,y_data_mean,color='r',marker='v')

JulyTemp = greenLandTemp[greenLandTemp['dt'].dt.month == 7]
y_temp = JulyTemp['AverageTemperature']
x_temp = JulyTemp['dt']

ax2 = ax.twinx()
ax2.plot(x_temp,y_temp,color='g',marker='o')
ax2.set_ylabel('Average Temperature (Celcius)',color='g')


ax.set_xlabel('Date (years)')
ax.set_ylabel('Sea Ice Extent (km^2)',color='r')
ax.set_title('Sea Ice extent in the Bafflin Bay')
ax.tick_params('y')
ax.tick_params('x',colors='blue')
plt.grid(True)
plt.show()

In [None]:
import seaborn as sns
sns.scatterplot(x=x_temp,y=y_temp)