In [1]:
# dependencies
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, join, outerjoin, MetaData, Table


In [2]:
# create engine to hawaii.sqlite
connect_string = "sqlite:///static/data/climateDB.db"

# reflect the tables
engine = create_engine(connect_string) 

# reflect an existing database into a new model
Base = automap_base()

# reflect the tables
Base.prepare(engine, reflect =True)


In [3]:
Base.classes.keys()

['CO2_emission', 'temp_change']

In [4]:
# Save references to each table
Emission = Base.classes.CO2_emission
Temp_change = Base.classes.temp_change

In [5]:
# Create a session (link) from Python to the sqlite DB
session = Session(bind = engine)

#Filter the data for the year >= 1961
results_emission = session.query(Emission).filter(Emission.Year >= 1961)
#print(results) 

In [6]:
emission_df = pd.read_sql(results_emission.statement, session.connection())

print(emission_df.head())
results_temp = session.query(Temp_change)

#print(results) 

temp_df = pd.read_sql(results_temp.statement, session.connection())

selection = ['DecJanFeb', 'MarAprMay', 'JunJulAug', 'SepOctNov']

        Entity Code  Year  AnnualCO2emissions
0  Afghanistan  AFG  1961              490798
1  Afghanistan  AFG  1962              688594
2  Afghanistan  AFG  1963              706736
3  Afghanistan  AFG  1964              838551
4  Afghanistan  AFG  1965             1006917


In [7]:




season_df = temp_df.loc[temp_df["Months"].isin(selection)]

#filter by months
month_df = temp_df.loc[(~temp_df["Months"].isin(selection)) & (temp_df["Months"] != 'Meteorological year') ]

# filter Meteorological year
meteor_df = temp_df.loc[temp_df["Months"] == 'Meteorological year' ]
# Calculate avg temp per Meteorological year
meteor_df['avg_temp']= meteor_df.mean(axis =1)
meteor_id_df = meteor_df.set_index('Area')

session.close() 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [12]:
month_df.head(12)



Unnamed: 0,field1,Area,Months,Element,Unit,1961,1962,1963,1964,1965,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,0,Afghanistan,January,Temperature change,°C,0.777,0.062,2.744,-5.232,1.868,...,3.601,1.179,-0.583,1.233,1.755,1.943,3.416,1.201,1.996,2.951
1,2,Afghanistan,February,Temperature change,°C,-1.743,2.465,3.919,-0.202,-0.096,...,1.212,0.321,-3.201,1.494,-3.187,2.699,2.251,-0.323,2.705,0.086
2,4,Afghanistan,March,Temperature change,°C,0.516,1.336,0.403,1.659,-0.909,...,3.39,0.748,-0.527,2.246,-0.076,-0.497,2.296,0.834,4.418,0.234
3,6,Afghanistan,April,Temperature change,°C,-1.709,0.117,0.919,-0.533,-1.816,...,2.591,1.712,1.417,-0.052,0.585,1.589,0.98,1.252,1.442,0.899
4,8,Afghanistan,May,Temperature change,°C,1.412,-0.092,-0.69,-0.16,-0.19,...,1.419,3.643,0.909,1.201,0.959,1.862,3.246,3.28,0.855,0.647
5,10,Afghanistan,June,Temperature change,°C,-0.058,-1.061,1.164,-0.519,-0.422,...,0.594,2.45,0.305,1.924,1.492,1.289,1.586,2.002,1.786,-0.289
6,12,Afghanistan,July,Temperature change,°C,0.884,0.292,0.348,-0.142,0.116,...,0.494,0.532,0.973,1.126,0.671,1.092,1.079,0.901,1.815,1.885
7,14,Afghanistan,August,Temperature change,°C,0.391,-0.22,0.094,0.671,-0.686,...,0.587,1.93,1.356,0.886,0.706,0.589,0.311,0.102,0.982,0.773
8,16,Afghanistan,September,Temperature change,°C,1.445,-1.797,0.163,-0.253,-0.661,...,0.03,1.408,0.495,2.586,2.268,0.172,3.016,0.93,1.063,2.004
9,18,Afghanistan,October,Temperature change,°C,-1.102,-0.968,1.654,-2.839,2.211,...,2.927,1.339,0.827,1.691,1.134,1.894,0.792,2.092,-0.103,1.264


In [102]:
def get_mean_and_year(df):
    months_country_group_df = month_df.groupby(['Area','Months'],sort=False).mean()

    #Rename and drop field1
    months_country_mean= months_country_group_df.drop('field1', 1)
    
    #get years
    year = months_country_mean.columns
    
    return months_country_mean, year   


In [18]:
country_list = months_country_mean.index
countries = [item[0] for item in country_list]
unique_countries = []
for item in countries:
    if(item not in unique_countries):
        unique_countries.append(item)
unique_countries  

['Afghanistan',
 'Africa',
 'Albania',
 'Algeria',
 'Americas',
 'Andorra',
 'Angola',
 'Anguilla',
 'Annex I countries',
 'Antarctica',
 'Argentina',
 'Asia',
 'Australia',
 'Australia and New Zealand',
 'Austria',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia (Plurinational State of)',
 'Botswana',
 'Brazil',
 'British Virgin Islands',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Caribbean',
 'Central African Republic',
 'Central America',
 'Chad',
 'Channel Islands',
 'Chile',
 'China',
 'China, Hong Kong SAR',
 'China, Macao SAR',
 'China, Taiwan Province of',
 'China, mainland',
 'Cocos (Keeling) Islands',
 'Colombia',
 'Congo',
 'Cuba',
 'Cyprus',
 "Côte d'Ivoire",
 "Democratic People's Republic of Korea",
 'Democratic Republic of the Congo',
 'Denmark',
 'Dominica',
 'Dominican Republic',
 'Eastern Africa',
 'Eastern Asia',
 'Eastern Europe',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea

In [103]:
months_country_mean, year = get_mean_and_year(month_df) 

months_list = []
avg_temp_list =[]
unit = 'Months'
for country in unique_countries:
    avg_temp_list.clear()
    country_df = months_country_mean.loc[country,:]
    #print(country_df)
    # Get months for each country
    month_index = country_df.index
    
    months_obj ={
    'Country': country,       
            }
    if len(month_index) > 4:
        months_obj['Year'] =year
        for item in month_index:
            months_obj[item] = country_df.loc[item].values,

        #print(country_df.loc[item].values)
        months_obj["Data Found"] = 'Yes'    
        
    else:
        months_obj["Data Found"] = 'No' 
        #print(f'...............\n Country: {country}\n')
        #print(country_df)
        #print(months_obj)
    
    months_list.append(months_obj)
        
        
       
        
months_list

[{'Country': 'Afghanistan',
  'Year': Index(['1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969',
         '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978',
         '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987',
         '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996',
         '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005',
         '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014',
         '2015', '2016', '2017', '2018', '2019'],
        dtype='object'),
  'January': (array([ 0.777,  0.062,  2.744, -5.232,  1.868,  3.629, -1.432,  0.389,
          -2.298,  0.804, -1.487, -1.305, -2.951, -1.184, -0.49 ,  2.409,
          -3.014, -0.663,  1.141, -0.393,  1.724,  0.678,  0.524, -0.058,
           0.435,  0.332,  2.655,  1.15 , -1.108,  0.634,  0.018,  0.582,
          -0.821,  1.087,  1.297, -0.718,  1.426,  0.95 ,  0.859,  1.565,
          -0.603,  1.6