In [1]:
import pandas as pd
import numpy as np
import os
from os.path import exists
import opendatasets as od
import shutil
import wbgapi as wb

import pymc as pm
import pytensor
import pytensor.tensor as pt
import matplotlib.pyplot as plt
import arviz as az

import requests
from io import BytesIO

config = {
    'figure.figsize':(14,4),
    'figure.constrained_layout.use':True,
    'figure.facecolor':'w',
    'axes.grid':True,
    'grid.linewidth':0.5,
    'grid.linestyle':'--',
    'axes.spines.top':False,
    'axes.spines.bottom':False,
    'axes.spines.left':False,
    'axes.spines.right':False
}

plt.rcParams.update(config)

SEED = sum(list(map(ord, 'olympics_bayes')))
rng = np.random.default_rng(SEED)

Data needed:

MVP:
- Olympics results
- Host country dummy
- Has won a medal before
- GDP per capita
- Population
- Freedom index

Extension:
- Latitude of largest city
- Export volume
- Import volume
- Area
- Migration rate
- Doctors per capita
- Gender equality <-- very promising

# Download Data

In [2]:
try:
    os.makedirs("../Data/raw", exist_ok=True)
    print(f"Ensured the destination folder '{"../Data/raw"}' exists.")
except Exception as e:
    print(f"Error creating destination folder: {e}")

try:
    os.makedirs("../Data/clean", exist_ok=True)
    print(f"Ensured the destination folder '{"../Data/clean"}' exists.")
except Exception as e:
    print(f"Error creating destination folder: {e}")

Ensured the destination folder '../Data/raw' exists.
Ensured the destination folder '../Data/clean' exists.


## Olympic Data

### Olympic Results and Host Countries

In [9]:
od.download( 
    "https://www.kaggle.com/datasets/piterfm/olympic-games-medals-19862018") 
#shutil.move("../Notebook/olympic-games-medals-19862018", "../Data/olympic-games-medals-19862018")

Dataset URL: https://www.kaggle.com/datasets/piterfm/olympic-games-medals-19862018
Downloading olympic-games-medals-19862018.zip to ./olympic-games-medals-19862018


100%|██████████████████████████████████████| 13.9M/13.9M [00:00<00:00, 21.3MB/s]





In [10]:
try:
    os.makedirs("../Data/raw", exist_ok=True)
    print(f"Ensured the destination folder '{"../Data/raw"}' exists.")
except Exception as e:
    print(f"Error creating destination folder: {e}")

try:
    for filename in os.listdir("../Notebook/olympic-games-medals-19862018"):
        source_file = os.path.join("../Notebook/olympic-games-medals-19862018", filename)
        destination_file = os.path.join("../Data/raw", filename)
        
        if os.path.isfile(source_file):  # Only move files, not directories
            shutil.move(source_file, destination_file)
            print(f"Moved {filename} to {"../Data/raw"}")
        else:
            print(f"Skipping directory {filename}")

    # After moving all files, delete the empty source folder
    os.rmdir("../Notebook/olympic-games-medals-19862018")
    print(f"Deleted the folder {"../Notebook/olympic-games-medals-19862018"}")
except Exception as e:
    print(f"Error: {e}")

Ensured the destination folder '../Data/raw' exists.
Moved olympic_medals.csv to ../Data/raw
Moved olympic_results.pkl to ../Data/raw
Moved olympic_hosts.csv to ../Data/raw
Moved olympic_athletes.csv to ../Data/raw
Moved olympic_results.csv to ../Data/raw
Deleted the folder ../Notebook/olympic-games-medals-19862018


## Independent Vars

### GDP

In [8]:
wb.data.DataFrame(['NY.GDP.MKTP.CD']).to_csv('../Data/raw/NY.GDP.MKTP.CD.csv')

### Population

In [6]:
wb.data.DataFrame(['SP.POP.TOTL']).to_csv('../Data/raw/SP_POP_TOTL.csv')

### Freedom Index

In [7]:
resp = requests.get("https://freedomhouse.org/sites/default/files/2024-02/Country_and_Territory_Ratings_and_Statuses_FIW_1973-2024.xlsx")

output = open('FIW_1973-2024.xls', 'wb')
output.write(resp.content)
output.close()

source_file = os.path.join("../Notebook/FIW_1973-2024.xls")
destination_file = os.path.join("../Data/raw/FIW_1973-2024.xls")

if os.path.isfile(source_file):  # Only move files, not directories
    shutil.move(source_file, destination_file)
    print(f"Moved {"FIW_1973-2024.xls"} to {"../Data/raw"}")
else:
    print(f"Skipping directory {filename}")

Moved FIW_1973-2024.xls to ../Data/raw
