#Feature: Read data from an external file, such as text, JSON, CSV, etc and use that data in your application
#Feature: Create and call at least 3 functions or methods, at least one of which must return a value that is used somewhere else in your code.
#Feature: Use pandas, matplotlib, and/or numpy to perform a data analysis project. Ingest 2 or more pieces of data, analyze that data in some manner, and display a new result to a graph, chart, or other display
#Feature: Create a dictionary or list, populate it with several values, retrieve at least one value, and use it in your program
#Feature: Visualize data in a graph, chart, or other visual representation of data

In [54]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [55]:
#Read from country_definitions.csv
df_countries = pd.read_csv('data/country_definitions.csv')
df_countries.rename(columns={'region': 'Country'}, inplace=True) # Rename Country column to NOC


df_countries.head() #TEST

Unnamed: 0,NOC,Country,notes
0,AFG,Afghanistan,
1,AHO,Curacao,Netherlands Antilles
2,ALB,Albania,
3,ALG,Algeria,
4,AND,Andorra,


In [56]:
#Read csv files
def import_data(file_name, season):
    df = pd.read_csv(file_name)
    df["Season"] = season #Adding a column for the season of the Olympic games
    df["Sport"] = df["Discipline"].astype(str) + "-" + df["Event"].astype(str) # Add a column discribes the event
    df.rename(columns={'Country': 'NOC'}, inplace=True) # Rename Country column to NOC
    df = df[['Year','NOC', 'Sport', 'Medal', 'Season']] # Select only useful columns

    return df


df1 = import_data("data/summer.csv", "summer")
df2 = import_data("data/winter.csv", "winter")

df1.head(2) #TEST

Unnamed: 0,Year,NOC,Sport,Medal,Season
0,1896,AUT,Swimming-100M Freestyle,Silver,summer
1,1896,AUT,Swimming-400M Freestyle,Gold,summer


In [57]:
#Sort Function  to prepare for joining dataframes

def sort_funct(df, col):
    df_sorted = df.sort_values(col)
    
    return df_sorted

df1_sorted = sort_funct (df1, 'NOC')
df2_sorted = sort_funct (df2, 'NOC')
df_countries_sorted = sort_funct (df_countries, 'NOC')


#Combine the two dataframes using outer join
combined_df1 = pd.merge(df1_sorted, df_countries, on='NOC', how='outer')
combined_df = pd.merge(df2_sorted, df_countries, on='NOC', how='outer')

combined_df1.tail(200) #TEST

Unnamed: 0,Year,NOC,Sport,Medal,Season,Country,notes
16685,2016.0,VIE,"Shooting-Free Pistol, 50 metres",Silver,summer,Vietnam,
16686,1964.0,YUG,Artistic G.-Horizontal Bar,Bronze,summer,Serbia,Yugoslavia
16687,1964.0,YUG,Artistic G.-Pommel Horse,Gold,summer,Serbia,Yugoslavia
16688,1980.0,YUG,Handball-Handball,Silver,summer,Serbia,Yugoslavia
16689,1972.0,YUG,Handball-Handball,Gold,summer,Serbia,Yugoslavia
...,...,...,...,...,...,...,...
16880,,VNM,,,,Vietnam,
16881,,WIF,,,,Trinidad,West Indies Federation
16882,,YAR,,,,Yemen,North Yemen
16883,,YEM,,,,Yemen,


In [58]:
#function to choose the season of Olympic games and the medal type

def Selection (season, medal):
    if season == 'summer' and medal == 'gold':
        result = combined_df1.query('Medal == "Gold"')
    elif season == 'summer' and medal == 'silver':
        result = combined_df1.query('Medal == "Silver"')
    elif season == 'summer' and medal == 'bronze':
        result = combined_df1.query('Medal == "Bronze"')
    elif season == 'winter' and medal == 'gold':
        result = combined_df2.query('Medal == "Gold"')
    elif season == 'winter' and medal == 'silver':
        result = combined_df2.query('Medal == "Silver"')
    elif season == 'winter' and medal == 'bronze':
        result = combined_df2.query('Medal == "Bronze"')
    
    # sorted = result['Country'].value_counts()
         
    
    return result

#TEST function with summer and silver
top_countries = Selection('summer', 'silver')

top5 = top_countries.head(5)
top5

Unnamed: 0,Year,NOC,Sport,Medal,Season,Country,notes
2,1988.0,AHO,Sailing-Board (Division Ii),Silver,summer,Curacao,Netherlands Antilles
4,2000.0,ALG,Athletics-5000M,Silver,summer,Algeria,
5,2008.0,ALG,Judo-81 - 90KG (Middleweight),Silver,summer,Algeria,
6,2016.0,ALG,"Athletics-1,500 metres",Silver,summer,Algeria,
7,2016.0,ALG,Athletics-800 metres,Silver,summer,Algeria,
