In [None]:
class Climate:
    ## CLASS DEFINITION
    # -------------------------------------------------------------------------------------------------------------- #
    # INITIALIZATION FUNCTION
    # associates the data collected from the "GlobalLandTemperatures_GlobalLandTemperaturesByMajorCity.csv" file
    def __init__(self,year,average_temp, average_unc, city, country,latitude,longitude):
        self.measure_year = year
        self.avg_temp = average_temp
        self.avg_unc = average_unc
        self.city = city
        self.country = country
        self.latitude = latitude
        self.longitude = longitude   
    # -------------------------------------------------------------------------------------------------------------- #
    # FIND HOW MANY COUNTRIES ARE STUDIED IN "GlobalLandTemperatures_GlobalLandTemperaturesByMajorCity.csv"
    # the file has measurements per month for an interval of years, which means that the same country appears
    # several times. Therefore we have to eliminate duplicates to find how many countries are studied
    def analyse_countries(self, countries):
        # eliminate duplicate entries from "countries" and store them into "countries_transformed"
        countries_transformed = list(dict.fromkeys(countries))
        # print message with how many countries are studied
        print('There are {} countries in the data base'.format(len(countries_transformed)))
        # return the list with each country in the study (without duplicates)
        return countries_transformed
    
    # -------------------------------------------------------------------------------------------------------------- #
    # GROUP COUNTRIES PRESENT IN "GlobalLandTemperatures_GlobalLandTemperaturesByMajorCity.csv" BY CONTINENT
    # it will be important to know which continent every country belongs to
    def locate_countries(self, cities):
        # initialize variables - one list per continent
        asia = []
        africa = []
        oceania = []
        north_america = []
        south_america = []
        europe = []
        population = []
        gdp=[]
        # import a file that has information abaout every city. The file has COUNTRY, CONTINENT, POPULATION and GDP PER CAPITA
        # this will tell to which continent every country belongs to
        import csv
        # the file is called "worldcities.csv" and the enconding is a PRE-DEFININITION (DO NOT CHANGE IT)
        with open('worldcities.csv',  encoding='utf-8') as data_countries:
            # read the data of the file into the "dataset_countries" variable
            dataset_countries = csv.DictReader(data_countries)
            # eliminate duplicate cities - as the data is per year, the same country will appear several times
            cities_transformed = list(dict.fromkeys(cities))
            # iterate through every contry in the imported file
            for row in dataset_countries:
                # iterate through every city that we have measurements for
                for city in cities_transformed:
                    # if the city that we have measurements to appears in the "worldcities.csv" file
                    if city == row['city']:
                        # if the country is associated with Africa in "worldcities.csv"
                        if row['continent'] == 'Africa' and (row['country'] in africa) == False:
                            # add that country to the list "africa"
                            africa.append(row['country'])
                        #process repeats for every continent
                        elif row['continent'] == 'Europe' and (row['country'] in europe) == False:
                            europe.append(row['country'])
                        elif row['continent'] == 'Asia' and (row['country'] in asia) == False:
                            asia.append(row['country']) 
                        elif row['continent'] == 'Oceania' and (row['country'] in oceania) == False:
                            oceania.append(row['country'])
                        elif row['continent'] == 'South America' and (row['country'] in south_america) == False:
                            south_america.append(row['country'])
                        elif row['continent'] == 'North America' and (row['country'] in north_america) == False:
                            north_america.append(row['country'])
                    gdp.append(row['gdp_per_capita_2020'])
                    population.append(row['population'])
            
            # returns the lists that have the countries per continent
            return asia, africa, europe, oceania, north_america, south_america, gdp, population
    
    # -------------------------------------------------------------------------------------------------------------- #
    # PLOT THE DISTRIBUTION OF COUNTRIES PER CONTINENT IN THIS STUDY
    # find which of the continents is best represented must be taken into account in the final analysis
    def plot_countries(self, europe, asia, africa, oceania, north_america, south_america, countries_transformed):
        #import matlib module to create plot
        import matplotlib.pyplot as plt
        #get the number of countries per continent
        num_europe = len(europe)
        num_asia = len(asia)
        num_africa = len(africa)
        num_oceania = len(oceania)
        num_north_america = len(north_america)
        num_south_america = len(south_america)
        # create a list that has how many countries there are per continent, for every continent
        continents_array = [num_europe, num_asia, num_africa, num_oceania, num_north_america, num_south_america]
        # start the plot
        fig = plt.figure()
            # create axis
        ax = fig.add_axes([0,0,1,1])
            # create the legend for the x axis
        continents = ['Europe', 'Asia', 'Africa', 'Oceania', 'North Am.', 'South Am.']
            # 'continents' will go to the x axis and 'continents_array' to the y axis
        graph = ax.bar(continents,continents_array)
        ax.set_ylim([0, 20])
        plt.title("Representation of continents in study")  
            # giving X and Y labels
        plt.xlabel("Continents")
        plt.ylabel("Number of Countries represented")
        # create labels for the bars ( -0.1 and 0.5 are obtained by experimenting several values)
        for i in range(len(continents)):
            plt.annotate(continents_array[i], (-0.1 + i, continents_array[i] + 0.5))
        # show the plot
        plt.show()
        
        # print the how many % each continent is represented in the study
        # len(num_europe) represents how many european contries we have and len(countries_transformed) represents how many countries
        # we have in the study
        print ('Europe represents ' + str(int(num_europe/len(countries_transformed)*100)) + '% of the countries studied')
        print ('Asia represents ' + str(int(num_asia/len(countries_transformed)*100)) + '% of the countries studied')
        print ('Africa represents ' + str(int(num_africa/len(countries_transformed)*100)) + '% of the countries studied')
        print ('Oceania represents ' + str(int(num_oceania/len(countries_transformed)*100)) + '% of the countries studied')
        print ('North America represents ' + str(int(num_north_america/len(countries_transformed)*100)) + '% of the countries studied')
        print ('South America represents ' + str(int(num_south_america/len(countries_transformed)*100)) + '% of the countries studied')
        
       # -------------------------------------------------------------------------------------------------------------- #
    def organize_dict_data(self,years,average_temp, average_unc, city, countries_list ,latitude,longitude, population, gdp):
        # initialize variables - one list per continent
        individual_data ={}
        specific_data =[]
        data_final = {}
        j=0
        for i in range(len(city)):
            if i == 0 or city[i] != city[i-1]:
                individual_data['city'] = city[i]
                individual_data['country'] = countries_list[i]
                individual_data['gdp'] = gdp[j]
                individual_data['population'] = population[j]
                individual_data['longitude'] = longitude[i]
                individual_data['latitude'] = latitude[i]
                individual_data['Year 1st Measurement'] = years [i]
                individual_data['First Measure'] = average_temp[i]
                individual_data['First Measure Uncertainty'] = average_temp_unc[i]
                j += 1
            elif i == len(city)-1 or city[i] != city[i+1]:
                individual_data['Year last Measurement'] = years [i]
                individual_data['Last Measure'] = average_temp[i]
                individual_data['Last Measure Uncertainty'] = average_temp_unc[i]
                specific_data.append(individual_data)
                data_final[city[i]] = specific_data
                specific_data = []
                individual_data ={}
        return data_final
    
    def analyse_data_final(self,data_final,cities):
        # create delta temperature
        city_analysis= []
        delta_temp = []
        delta = 0
        import matplotlib.pyplot as plt
        import numpy as np
        from tabulate import tabulate
        total_delta = 0
        i=0
        cities_transformed = list(dict.fromkeys(cities))
        for city in cities_transformed:
            city_analysis.append(city)
            delta = data_final[city][0]['Last Measure'] - data_final[city][0]['First Measure']
            delta_temp.append(delta)   
            data_final[city][0]['Delta Temp'] = delta
            total_delta = total_delta + delta
            i += 1 
        # Bar plot
        fig = plt.figure(figsize = (20, 5))
        plt.bar(city_analysis, delta_temp, color ='maroon',align = 'center', width = 0.20)
        plt.xticks(rotation=90)
        plt.xlabel("Cities analysed")
        plt.ylabel("Delta T (ºC)")
        plt.title("Evolution of surface temperature around the globe")
        plt.show()
        
        #create table
        data = []
        for city in cities_transformed:
            individual_data = []
            individual_data.append(data_final[city][0]['city'])
            individual_data.append(data_final[city][0]['Year 1st Measurement'])
            individual_data.append(data_final[city][0]['First Measure'])
            individual_data.append(data_final[city][0]['Year last Measurement'])
            individual_data.append(data_final[city][0]['Last Measure'])
            individual_data.append(data_final[city][0]['Delta Temp'])
            data.append(individual_data)       
        
        print (tabulate(data, headers=["City", "Year 1st Temperature", "1st Measurement", 
                                           "Year Last Temperature", 'Last Measurement', 'Delta Temp']))
        
        print('\n The overall temperature change was: ' + str(round((total_delta/i),3)) + ' ºC')
        
        #find maximum and minimum
        max_delta = max(delta_temp)
        max_delta_i = delta_temp.index(max_delta)
        min_delta = min(delta_temp)
        min_delta_i = delta_temp.index(min_delta)
        print('\n The city with lowest delta temperature is ' + cities_transformed[min_delta_i] + ' in ' + 
              data_final[cities_transformed[min_delta_i]][0]['country'] + ' and it has a delta T of: ' + str(round(min_delta,3))
             + ' ºC')
              
        print('\n The city with highest delta temperature is ' + cities_transformed[max_delta_i] + ' in ' + 
              data_final[cities_transformed[max_delta_i]][0]['country'] + ' and it has a delta T of: ' + str(round(max_delta,3))
             + ' ºC')
            