In [187]:
import pandas as pd
import os

In [188]:
all_data = {}
dtype = {
    "country_region_code":str,
    "country_region":str,
    "sub_region_1":str,
    "sub_region_2":str,
    "metro_area":str,
    "iso_3166_2_code":str,
    "census_fips_code":str,
    "place_id":str,
    "date":object,
    "retail_and_recreation_percent_change_from_baseline":float,
    "grocery_and_pharmacy_percent_change_from_baseline":float,
    "parks_percent_change_from_baseline":float,
    "transit_stations_percent_change_from_baseline":float,
    "workplaces_percent_change_from_baseline":float,
    "residential_percent_change_from_baseline":float
}
deleted_countries = [
    "LT",#Letonya
    "LU",#Lüksemburg
    "MT",#Malta
    "SI",#Slovenya
    "SE",#İsveç
]

dropped_columns = [
    'sub_region_1',
    'sub_region_2',
    'metro_area',
    'iso_3166_2_code',
    'census_fips_code',
    'place_id',
]

new_column_names = {
    'retail_and_recreation_percent_change_from_baseline': 'retail_and_recreation',
    'grocery_and_pharmacy_percent_change_from_baseline': 'grocery_and_pharmacy',
    'parks_percent_change_from_baseline': 'parks',
    'transit_stations_percent_change_from_baseline': 'transit_stations',
    'workplaces_percent_change_from_baseline': 'workplaces',
    'residential_percent_change_from_baseline': 'residential',
}

files = os.listdir("assets")
countries = [x for x in files if x[5:7] not in deleted_countries]  # deleted unnecessary countries
countries.sort()  # sort alphabetically

columns_will_get_from_who_data = ['New_cases', 'New_deaths']
who_data = pd.read_csv("WHO-COVID-19-global-data.csv")
who_data['date'] = pd.to_datetime(who_data['Date_reported'])

In [189]:
for csv_file in countries:
    file_name = csv_file.split(".")[0]
    data_frame = pd.read_csv(
        "assets/{filename}".format(filename=csv_file),
        dtype=dtype
    )
    all_data[file_name] = data_frame

In [190]:
clean_all_data = []
for data in all_data:
    df = all_data[data]
    cleaned_data = df[df['sub_region_1'].isnull()]
    #cleaned_data.drop(dropped_columns, axis=1)
    clean_all_data.append(cleaned_data)

In [191]:
time_edited_data = []
for data_frame in clean_all_data:
    country_code = data_frame['country_region_code'].iloc[0]
    start_date = data_frame['date'].iloc[0]
    end_date = data_frame['date'].iloc[-1]
    who_data_by_country = who_data.loc[who_data['Country_code'] == country_code]
    mask = (who_data_by_country['date'] >= start_date) & (who_data_by_country['date'] <= end_date)
    edited_by_date = who_data_by_country.loc[mask]
    columns_will_be_added = [pd.Series(edited_by_date[x].to_numpy(), name=x) for x in columns_will_get_from_who_data]
    data_frame = pd.concat([data_frame, *columns_will_be_added], axis=1)
    time_edited_data.append(data_frame)

In [192]:
combined_data = pd.concat(time_edited_data)
combined_data.drop(columns=dropped_columns, inplace=True)
combined_data.rename(columns=new_column_names, inplace=True)
combined_data.to_csv("data.csv")