In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
init_notebook_mode(connected = True)

## Import data anr wrangle

In [2]:
global_temp_country=pd.read_csv('GlobalLandTemperaturesByCountry.csv')
global_temp_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
#check the shape
global_temp_country.shape

(577462, 4)

In [4]:
#check for missing values
global_temp_country.isna().sum()

dt                                   0
AverageTemperature               32651
AverageTemperatureUncertainty    31912
Country                              0
dtype: int64

In [5]:
#drop missing values
global_temp_country.dropna(axis=0, how = 'any', subset = ['AverageTemperature'], inplace = True)

In [6]:
global_temp_country.isna().sum()

dt                               0
AverageTemperature               0
AverageTemperatureUncertainty    0
Country                          0
dtype: int64

In [7]:
#get the number of countries in the dataset
global_temp_country['Country'].nunique()

242

In [8]:
#the unique country names are
global_temp_country['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt'

In [9]:
#unify country names that mean the same
dict = {'Congo (Democratic Republic Of The)':'Congo',
       'Denmark (Europe)':'Denmark',
       'France (Europe)':'France',
       'Netherlands (Europe)':'Netherlands',
       'United Kingdom (Europe)':'United Kingdom'}

In [10]:
global_temp_country['Country'].replace(dict, inplace=True)

In [14]:
#final dataset
avg_temp=global_temp_country.groupby(['Country'])['AverageTemperature'].mean().to_frame().reset_index()
avg_temp

Unnamed: 0,Country,AverageTemperature
0,Afghanistan,14.045007
1,Africa,24.074203
2,Albania,12.610646
3,Algeria,22.985112
4,American Samoa,26.611965
...,...,...
232,Western Sahara,22.319818
233,Yemen,26.253597
234,Zambia,21.282956
235,Zimbabwe,21.117547


In [15]:
#Import essential libraries
import plotly.express as px
import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.graph_objs as go
import plotly.tools as tls

In [16]:
fig = px.choropleth(avg_temp, locations='Country',locationmode='country names', color = 'AverageTemperature')
fig.update_layout(title = 'Choropleth Map of Average Temperature')
fig.show()

### The above is a visualisation of the current average temperatures across the globe. Temperatures drop towards the poler regions while the central regions are hotter due to the fact that that is the path of the sun.

## Investigating the existence of Global warming

In [17]:
global_temp=pd.read_csv('GlobalTemperatures.csv')
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,


In [18]:
#we only require the year
global_temp['dt'][0].split('-')[0]

'1750'

In [19]:
#function to extract the year from all the dates
def fetch_year(date):
    return date.split('-')[0]

In [20]:
fetch_year('1759-4-1')

'1759'

In [21]:
global_temp['Years'] = global_temp['dt'].apply(fetch_year)

In [22]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Years
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [23]:
#group the average land temp and land temperature uncertaity on the basis of year

In [24]:
data = global_temp.groupby('Years').agg({'LandAverageTemperature':'mean','LandAverageTemperatureUncertainty':'mean'}).reset_index()
data.head()

Unnamed: 0,Years,LandAverageTemperature,LandAverageTemperatureUncertainty
0,1750,8.719364,2.637818
1,1751,7.976143,2.781143
2,1752,5.779833,2.977
3,1753,8.388083,3.176
4,1754,8.469333,3.49425


In [25]:
#first, we need to calculate the uncertainty values whch is a range of posssible values within which the actual temperature lies
data['Uncertainty Top'] = data['LandAverageTemperature'] + data['LandAverageTemperatureUncertainty']
data['Uncertainty Bottom'] = data['LandAverageTemperature'] - data['LandAverageTemperatureUncertainty']

data.head()

Unnamed: 0,Years,LandAverageTemperature,LandAverageTemperatureUncertainty,Uncertainty Top,Uncertainty Bottom
0,1750,8.719364,2.637818,11.357182,6.081545
1,1751,7.976143,2.781143,10.757286,5.195
2,1752,5.779833,2.977,8.756833,2.802833
3,1753,8.388083,3.176,11.564083,5.212083
4,1754,8.469333,3.49425,11.963583,4.975083


In [26]:
data.columns

Index(['Years', 'LandAverageTemperature', 'LandAverageTemperatureUncertainty',
       'Uncertainty Top', 'Uncertainty Bottom'],
      dtype='object')

In [27]:
#Lets use line plot available in the plotly lib
fig = px.line(data, x='Years', y=['LandAverageTemperature','Uncertainty Top','Uncertainty Bottom'])
fig.show()

#### The line plot above shows us a rising temperature as well as an increasing correlation between uncertainty and the temperature figure. This is because, overtime, there has been development in measuring atmospheric temperature reducing the margin for error. We begin to notice a sharp rise between 1960 and 2014 when measurements were very accurate.

### analyse average temperature in each season

In [28]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Years
0,1750-01-01,3.034,3.574,,,,,,,1750
1,1750-02-01,3.083,3.702,,,,,,,1750
2,1750-03-01,5.626,3.076,,,,,,,1750
3,1750-04-01,8.49,2.451,,,,,,,1750
4,1750-05-01,11.573,2.072,,,,,,,1750


In [29]:
#use the months value in dt to get seasons

In [30]:
global_temp['dt'].dtype

dtype('O')

In [31]:
#convert to date time

In [32]:
global_temp['dt']=pd.to_datetime(global_temp['dt'])

In [33]:
global_temp['Month'] = global_temp['dt'].dt.month

In [34]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Years,Month
0,1750-01-01,3.034,3.574,,,,,,,1750,1
1,1750-02-01,3.083,3.702,,,,,,,1750,2
2,1750-03-01,5.626,3.076,,,,,,,1750,3
3,1750-04-01,8.49,2.451,,,,,,,1750,4
4,1750-05-01,11.573,2.072,,,,,,,1750,5


In [35]:
#create a function to extract seasons

In [36]:
def get_season(month):
    if month>=3 and month<6:
        return 'Spring'
    elif month>5 and month<9:
        return 'Summer'
    elif month>8 and month<12:
        return 'Autumn'
    else:
        return 'Winter'

In [37]:
global_temp['Seasons'] = global_temp['Month'].apply(get_season)

In [38]:
global_temp.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty,Years,Month,Seasons
0,1750-01-01,3.034,3.574,,,,,,,1750,1,Winter
1,1750-02-01,3.083,3.702,,,,,,,1750,2,Winter
2,1750-03-01,5.626,3.076,,,,,,,1750,3,Spring
3,1750-04-01,8.49,2.451,,,,,,,1750,4,Spring
4,1750-05-01,11.573,2.072,,,,,,,1750,5,Spring


In [39]:
#checking the unique years

In [40]:
years = global_temp['Years'].unique()

In [41]:
spring_temps=[]
summer_temps=[]
autumn_temps=[]
winter_temps=[]

In [42]:
for year in years:
    current_df = global_temp[global_temp['Years']==year]
    spring_temps.append(current_df[current_df['Seasons']=='Spring']['LandAverageTemperature'].mean())
    summer_temps.append(current_df[current_df['Seasons']=='Summer']['LandAverageTemperature'].mean())
    autumn_temps.append(current_df[current_df['Seasons']=='Autumn']['LandAverageTemperature'].mean())
    winter_temps.append(current_df[current_df['Seasons']=='Winter']['LandAverageTemperature'].mean())

In [43]:
spring_temps

[8.563,
 6.734999999999999,
 7.035499999999999,
 8.627333333333334,
 9.074333333333334,
 8.583666666666666,
 9.466,
 8.604666666666667,
 6.896666666666666,
 6.897333333333333,
 6.653666666666666,
 8.915999999999999,
 7.809333333333332,
 6.715999999999998,
 8.192,
 8.868666666666668,
 8.432333333333332,
 7.831,
 6.144000000000001,
 8.803333333333333,
 7.132000000000001,
 6.0523333333333325,
 7.148666666666666,
 8.866999999999999,
 10.607,
 9.036666666666667,
 7.522333333333333,
 7.774333333333334,
 8.957999999999998,
 10.370666666666667,
 11.737666666666664,
 7.599,
 7.390999999999998,
 8.397333333333334,
 7.3580000000000005,
 6.173666666666667,
 8.849666666666666,
 7.9576666666666656,
 8.159333333333333,
 7.782999999999999,
 6.997333333333333,
 7.9253333333333345,
 7.914666666666666,
 8.248,
 9.146333333333333,
 8.552,
 7.507666666666666,
 7.024333333333334,
 8.953333333333333,
 8.041666666666666,
 8.224666666666666,
 8.660666666666666,
 7.760333333333333,
 8.653666666666666,
 8.863,
 

In [44]:
season = pd.DataFrame()

In [45]:
season['year']=years
season['spring_temps']=spring_temps
season['summer_temps']=summer_temps
season['autumn temps']=autumn_temps
season['winter temps']=winter_temps

In [46]:
season.head()

Unnamed: 0,year,spring_temps,summer_temps,autumn temps,winter temps
0,1750,8.563,14.518333,8.89,2.963
1,1751,6.735,14.116,10.673,1.729
2,1752,7.0355,,7.587,2.717
3,1753,8.627333,14.608333,9.212333,1.104333
4,1754,9.074333,14.208333,8.957333,1.637333


In [47]:
season.columns

Index(['year', 'spring_temps', 'summer_temps', 'autumn temps', 'winter temps'], dtype='object')

In [49]:
fig2 = px.line(season, x = 'year', y = ['spring_temps', 'summer_temps', 'autumn temps', 'winter temps'], title = 'Average Temperature in each Season')
fig2.show()

### We also see that sharp ascension from 1960 upwards


## Temperature trends in top economies

In [52]:
TopEconomies = ['Russia', 'United States', 'China', 'Japan', 'Australia', 'India', 'Nigeria']

In [53]:
global_temp_country.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
5,1744-04-01,1.53,4.68,Åland
6,1744-05-01,6.702,1.789,Åland
7,1744-06-01,11.609,1.577,Åland
8,1744-07-01,15.342,1.41,Åland


In [50]:
#check the listed countries in the dataframe

In [60]:
TopEco_df = global_temp_country[global_temp_country['Country'].isin(TopEconomies)]
TopEco_df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
34816,1852-07-01,14.116,1.53,Australia
34817,1852-08-01,15.33,1.4,Australia
34818,1852-09-01,18.74,1.446,Australia
34819,1852-10-01,21.984,1.493,Australia
34820,1852-11-01,24.073,1.466,Australia


In [61]:
#fetch year using the fetch_year function already defined

In [62]:
TopEco_df['Years'] = TopEco_df['dt'].apply(fetch_year)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [63]:
avg_temp = TopEco_df.groupby(['Years', 'Country']).agg({'AverageTemperature':'mean'}).reset_index()
avg_temp.head()

Unnamed: 0,Years,Country,AverageTemperature
0,1768,United States,5.57275
1,1769,United States,10.4465
2,1774,United States,1.603
3,1775,United States,9.499167
4,1776,United States,8.11


In [64]:
fig = px.line(avg_temp, x='Years', y='AverageTemperature', color='Country', title = 'Average Temperature in the World')
fig.show()

### Of all the top economies, Nigeria came out tops in average temperature

## AVERAGE TEMPERATURE OF USA STATES

In [65]:
global_temp_state = pd.read_csv('GlobalLandTemperaturesByState.csv')
global_temp_state.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil


In [97]:
filter = global_temp_state['Country']=='United States'
USA = global_temp_state[filter]
USA.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
7458,1743-11-01,10.722,2.898,Alabama,United States
7459,1743-12-01,,,Alabama,United States
7460,1744-01-01,,,Alabama,United States
7461,1744-02-01,,,Alabama,United States
7462,1744-03-01,,,Alabama,United States


In [98]:
USA.dropna(inplace = True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [99]:
USA['State'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District Of Columbia',
       'Florida', 'Georgia (State)', 'Hawaii', 'Idaho', 'Illinois',
       'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
       'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
       'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
       'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
       'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
       'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)

In [100]:
#replacing incorrect names
state = {'Georgia (State)':'Georgia','District Of Columbia':'Columbia' }

In [101]:
USA['State'].replace(state, inplace = True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [102]:
USA.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
7458,1743-11-01,10.722,2.898,Alabama,United States
7463,1744-04-01,19.075,2.902,Alabama,United States
7464,1744-05-01,21.197,2.844,Alabama,United States
7465,1744-06-01,25.29,2.879,Alabama,United States
7466,1744-07-01,26.42,2.841,Alabama,United States


In [103]:
USA = USA[['AverageTemperature', 'State']]
USA.head()

Unnamed: 0,AverageTemperature,State
7458,10.722,Alabama
7463,19.075,Alabama
7464,21.197,Alabama
7465,25.29,Alabama
7466,26.42,Alabama


In [104]:
#group according to state
USA_temp = USA.groupby('State')['AverageTemperature'].mean().reset_index()
USA_temp.head()

Unnamed: 0,State,AverageTemperature
0,Alabama,17.066138
1,Alaska,-4.890738
2,Arizona,15.381526
3,Arkansas,15.573963
4,California,14.327677


In [105]:
!pip install opencage



In [106]:
from opencage.geocoder import OpenCageGeocode

In [107]:
key = '77519d6190ae41ea8ca1cbe9b0bd28db'

In [108]:
geocoder = OpenCageGeocode(key)

In [109]:
location = 'Abuja, Nigeria'
results = geocoder.geocode(location)
results

[{'annotations': {'DMS': {'lat': "9° 3' 51.58980'' N",
    'lng': "7° 29' 21.47064'' E"},
   'MGRS': '32PLR3396302309',
   'Maidenhead': 'JJ39rb85rk',
   'Mercator': {'x': 833704.773, 'y': 1006545.107},
   'OSM': {'edit_url': 'https://www.openstreetmap.org/edit?node=31203257#map=17/9.06433/7.48930',
    'note_url': 'https://www.openstreetmap.org/note/new#map=17/9.06433/7.48930&layers=N',
    'url': 'https://www.openstreetmap.org/?mlat=9.06433&mlon=7.48930#map=17/9.06433/7.48930'},
   'UN_M49': {'regions': {'AFRICA': '002',
     'NG': '566',
     'SUB-SAHARAN_AFRICA': '202',
     'WESTERN_AFRICA': '011',
     'WORLD': '001'},
    'statistical_groupings': ['LEDC']},
   'callingcode': 234,
   'currency': {'alternate_symbols': [],
    'decimal_mark': '.',
    'html_entity': '&#x20A6;',
    'iso_code': 'NGN',
    'iso_numeric': '566',
    'name': 'Nigerian Naira',
    'smallest_denomination': 50,
    'subunit': 'Kobo',
    'subunit_to_unit': 100,
    'symbol': '₦',
    'symbol_first': 1,
  

In [110]:
#to access latitude and longitude
results[0]['geometry']['lat']

9.0643305

In [111]:
results[0]['geometry']['lng']

7.4892974

In [112]:
list_lat = []
list_long = []

for state in USA_temp['State']:
    results = geocoder.geocode(state)
    lat = results[0]['geometry']['lat']
    long = results[0]['geometry']['lng']
    
    list_lat.append(lat)
    list_long.append(long)

In [113]:
USA_temp['Latitude'] = list_lat
USA_temp['Longitude'] = list_long

In [114]:
USA_temp.head()

Unnamed: 0,State,AverageTemperature,Latitude,Longitude
0,Alabama,17.066138,33.258882,-86.829534
1,Alaska,-4.890738,64.445961,-149.680909
2,Arizona,15.381526,34.395342,-111.763275
3,Arkansas,15.573963,35.204888,-92.447911
4,California,14.327677,36.701463,-118.755997


In [115]:
#Import folium to create heatmaps

In [116]:
import folium
from folium.plugins import HeatMap

In [117]:
#basemap of folium
basemap = folium.Map()

In [118]:
#select the three columns required
HeatMap(USA_temp[['Latitude', 'Longitude', 'AverageTemperature']]).add_to(basemap)
basemap

## ANALYSE AVERAGE TEMPERATURE OF MAJOR INDIAN CITIES BY MONTH

In [119]:
cities = pd.read_csv('GlobalLandTemperaturesByCity.csv')
cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [120]:
#fliter India out
India = cities[cities['Country']=='India']
India.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
49880,1816-03-01,19.934,2.258,Abohar,India,29.74N,73.85E
49881,1816-04-01,26.641,3.398,Abohar,India,29.74N,73.85E
49882,1816-05-01,32.535,2.408,Abohar,India,29.74N,73.85E
49883,1816-06-01,33.254,2.123,Abohar,India,29.74N,73.85E
49884,1816-07-01,31.105,1.848,Abohar,India,29.74N,73.85E


In [121]:
India.shape

(1014906, 7)

In [122]:
India['City'].unique()

array(['Abohar', 'Achalpur', 'Adilabad', 'Adoni', 'Agartala', 'Agra',
       'Ahmadabad', 'Ahmadnagar', 'Aizawl', 'Ajmer', 'Akola', 'Alandur',
       'Alappuzha', 'Aligarh', 'Allahabad', 'Alwar', 'Ambala',
       'Ambarnath', 'Ambattur', 'Ambur', 'Amravati', 'Amritsar', 'Amroha',
       'Anand', 'Anantapur', 'Ara', 'Asansol', 'Aurangabad', 'Avadi',
       'Azamgarh', 'Badlapur', 'Bagaha', 'Bahadurgarh', 'Baharampur',
       'Bahraich', 'Baidyabati', 'Baleshwar', 'Ballia', 'Bally',
       'Balurghat', 'Banda', 'Bangalore', 'Bangaon', 'Bankura',
       'Bansbaria', 'Barakpur', 'Baranagar', 'Barasat', 'Barddhaman',
       'Bareli', 'Baripada', 'Barnala', 'Barsi', 'Basirhat', 'Basti',
       'Batala', 'Beawar', 'Begusarai', 'Belgaum', 'Bellary', 'Bettiah',
       'Bhadravati', 'Bhadreswar', 'Bhagalpur', 'Bharatpur', 'Bharuch',
       'Bhatpara', 'Bhavnagar', 'Bhilai', 'Bhilwara', 'Bhimavaram',
       'Bhind', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bhuj',
       'Bhusawal', 'Bid',

In [123]:
#We consider 7 major cities
cities = ['New Delhi', 'Bangalore', 'Hyderabad', 'Pune', 'Madras', 'Varanasi', 'Gurgaon']
cities = India[India['City'].isin(cities)]
cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
630113,1796-01-01,22.672,2.317,Bangalore,India,12.05N,77.26E
630114,1796-02-01,24.42,1.419,Bangalore,India,12.05N,77.26E
630115,1796-03-01,26.092,2.459,Bangalore,India,12.05N,77.26E
630116,1796-04-01,27.687,1.746,Bangalore,India,12.05N,77.26E
630117,1796-05-01,27.619,1.277,Bangalore,India,12.05N,77.26E


In [124]:
#strip off the extra N and E in the cordinates
cities['Latitude'] = cities['Latitude'].str.strip('N')
cities['Longitude'] = cities['Longitude'].str.strip('E')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [125]:
cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
630113,1796-01-01,22.672,2.317,Bangalore,India,12.05,77.26
630114,1796-02-01,24.42,1.419,Bangalore,India,12.05,77.26
630115,1796-03-01,26.092,2.459,Bangalore,India,12.05,77.26
630116,1796-04-01,27.687,1.746,Bangalore,India,12.05,77.26
630117,1796-05-01,27.619,1.277,Bangalore,India,12.05,77.26


In [126]:
#to extract month, convert dt to datetime
cities['dt'] = pd.to_datetime(cities['dt'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [127]:
cities['Month'] = cities['dt'].dt.month



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [128]:
cities.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,Month
630113,1796-01-01,22.672,2.317,Bangalore,India,12.05,77.26,1
630114,1796-02-01,24.42,1.419,Bangalore,India,12.05,77.26,2
630115,1796-03-01,26.092,2.459,Bangalore,India,12.05,77.26,3
630116,1796-04-01,27.687,1.746,Bangalore,India,12.05,77.26,4
630117,1796-05-01,27.619,1.277,Bangalore,India,12.05,77.26,5


In [129]:
#group on the basis of month and city and then access average temp
#alternative cities.groupby(['Month', 'City']).agg({'AverageTemperature':'mean'}).reset_index()
cities_temp = cities.groupby(['Month', 'City'])['AverageTemperature'].mean().reset_index()

In [130]:
cities_temp.head()

Unnamed: 0,Month,City,AverageTemperature
0,1,Bangalore,22.713981
1,1,Gurgaon,14.23856
2,1,Hyderabad,22.75829
3,1,Madras,24.346733
4,1,New Delhi,14.23856


In [131]:
#manipulate the columns
cities_temp.columns = ['month', 'City', 'Mean_temp']
cities_temp.head()

Unnamed: 0,month,City,Mean_temp
0,1,Bangalore,22.713981
1,1,Gurgaon,14.23856
2,1,Hyderabad,22.75829
3,1,Madras,24.346733
4,1,New Delhi,14.23856


In [132]:
#we have to get coordinates by merging with the cities dataframe
df = cities_temp.merge(cities, on= 'City')
df

Unnamed: 0,month,City,Mean_temp,dt,AverageTemperature,AverageTemperatureUncertainty,Country,Latitude,Longitude,Month
0,1,Bangalore,22.713981,1796-01-01,22.672,2.317,India,12.05,77.26,1
1,1,Bangalore,22.713981,1796-02-01,24.420,1.419,India,12.05,77.26,2
2,1,Bangalore,22.713981,1796-03-01,26.092,2.459,India,12.05,77.26,3
3,1,Bangalore,22.713981,1796-04-01,27.687,1.746,India,12.05,77.26,4
4,1,Bangalore,22.713981,1796-05-01,27.619,1.277,India,12.05,77.26,5
...,...,...,...,...,...,...,...,...,...,...
219487,12,Varanasi,16.246505,2013-05-01,34.333,0.589,India,24.92,83.35,5
219488,12,Varanasi,16.246505,2013-06-01,32.044,1.033,India,24.92,83.35,6
219489,12,Varanasi,16.246505,2013-07-01,30.281,0.420,India,24.92,83.35,7
219490,12,Varanasi,16.246505,2013-08-01,29.364,0.269,India,24.92,83.35,8


In [133]:
#remove duplcates
data = df.drop_duplicates(subset = ['month', 'City'])
data.head()

Unnamed: 0,month,City,Mean_temp,dt,AverageTemperature,AverageTemperatureUncertainty,Country,Latitude,Longitude,Month
0,1,Bangalore,22.713981,1796-01-01,22.672,2.317,India,12.05,77.26,1
2613,2,Bangalore,24.656619,1796-01-01,22.672,2.317,India,12.05,77.26,1
5226,3,Bangalore,27.062186,1796-01-01,22.672,2.317,India,12.05,77.26,1
7839,4,Bangalore,27.988517,1796-01-01,22.672,2.317,India,12.05,77.26,1
10452,5,Bangalore,27.522754,1796-01-01,22.672,2.317,India,12.05,77.26,1


In [134]:
#select important columns from data
data2 = data[['month', 'City', 'Mean_temp', 'Country', 'Latitude', 'Longitude']]
data2.head()

Unnamed: 0,month,City,Mean_temp,Country,Latitude,Longitude
0,1,Bangalore,22.713981,India,12.05,77.26
2613,2,Bangalore,24.656619,India,12.05,77.26
5226,3,Bangalore,27.062186,India,12.05,77.26
7839,4,Bangalore,27.988517,India,12.05,77.26
10452,5,Bangalore,27.522754,India,12.05,77.26


In [135]:
import plotly.graph_objs as go

In [136]:
data = [go.Heatmap(x = data2['month'], y = data2['City'], z = data2['Mean_temp'])]

In [137]:
#you need your alyout as well
layout = go.Layout(title = 'Average Monthly Temperature of Major Cities in India')

In [138]:
fig = go.Figure(data = data, layout = layout)
fig.show()

## April to September have the highest temperarures across India coinciding with the Summer and Autumn seasons