In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
import csv

import os
import glob

In [17]:
country_names = ['Bangladesh','China','Finland','France','Georgia','India',
                 'Indonesia','Kuwait','Netherlands','Norway','Philippines',
                 'Portugal','Qatar','Russia','SArabia','Senegal','Singapore',
                 'Spain','Sweden','Switzerland','Syria','Turkiye','UAE','UK',
                 'USA','Armenia','Australia','Azerbaijan','Bahrain','Chile',
                 'Chad','Croatia','Comoros','Austria','Argentina','Israel',
                 'Greenland','Iceland','Ireland','Greece','Japan','Jordan','Kazakhstan',
                'Kenya','Kyrgyz','Maldives','Malta','Mexico','Nepal','Oman','Pakistan',
                'Panama','Paraguay','Peru','Romania','SanMarino','Serbia','Sudan','Tanzania',
                'Tunisia','Uganda','Ukraine','Uzbekistan','Zimbabwe','Belarus','Bosnia',
                'Brazil','Bulgaria','BurkinaFaso','Burundi','Cameroon','Cuba','Cyprus','Gabon',
                'Ghana']

## Cleaning

In [18]:
for country in country_names:
    df = pd.read_csv(f"Datas/Raw Data/Countries/{country}.csv")
    df.columns = df.columns.str.replace(r'\[.*?\]', '', regex=True)
    df_new = df.drop(['Country Name','Series Code'], axis=1) # drop specific columns
    df_new.iloc[:,:] = df_new.iloc[:,:].round(2) # round all numbers to 2
    df_new.to_csv(f'Datas/Cleaned Data/Countries/{country}_cleaned.csv', index=False)

## Fixing

In [19]:
for country in country_names:
    df = pd.read_csv(f'Datas/Cleaned Data/Countries/{country}_cleaned.csv')
    df = df.rename(index={
        0: "GDP growth",
        1: "GDP per capita",
        2: "Population",
        3: "Population growth",
        4: "Inflation",
        5: "Unemployment",
        6: "Trade (% of GDP)"
    })
    df = df.drop("Series Name", axis=1) # drop the column that we don't need
    df.fillna(0.0, inplace=True)
    # save to new files
    df.to_csv(f"Datas/Fixed Data/Countries/{country}_fixed.csv", index=True)

In [20]:
# concatenate files
for country in country_names:
    df = pd.read_csv(f"Datas/Fixed Data/Countries/{country}_fixed.csv")
    df = pd.concat(gdf.T for _, gdf in df.set_index(df.index % 7).groupby(df.index // 7))
    df.reset_index().to_csv(f"Datas/Converted Data/Countries/{country}_converted.csv", index=False, header=False)

# rename and drop some columns
for country in country_names:    
    df = pd.read_csv(f"Datas/Converted Data/Countries/{country}_converted.csv")
    df = df.rename(columns={'Unnamed: 0': 'Year'})
    df.drop(['GDP growth','Population growth'], axis=1, inplace=True)
    df.to_csv(f"Datas/Converted Data/Countries/{country}_converted.csv", index=False)

# Prepare datas for Model

In [9]:
# The path that our datas in
folder_path = 'Datas/Converted Data/Countries'

# Get paths of all CSV files in folder
all_files = glob.glob(os.path.join(folder_path, "*.csv"))

# Combine file paths and create a single DataFrame
df_from_each_file = (pd.read_csv(f) for f in all_files)
concatenated_df   = pd.concat(df_from_each_file, ignore_index=True)

# save the final dataframe
concatenated_df.to_csv("Datas/Model Data/final.csv", index=False)



In [11]:
# read the CSV file
df = pd.read_csv('Datas/Model Data/final.csv')

# Check NaN values
if df.isnull().values.any():
    print("There are NaN values")
    nan_values = df[df.isna().any(axis=1)]
    print(nan_values)
else:
    print("No NaN values")


No NaN values
