### Función de Formateo

In [8]:
import pandas as pd
import numpy as np


def to_database_format(data, column_name = "Confirmed"):

    # Datos sin casos por fecha
    data_noFechas = data.iloc[:, 0:4]

    # Valores de fecha
    fechas = data.iloc[:, 4:].columns.values

    # Se extrae la fecha más reciente del dataset
    fecha_mas_reciente = str(pd.to_datetime(fechas).max().date())

    # Número de fechas y número de combinaciones únicas de país y estado
    num_fechas = len(fechas)
    num_regiones = len(data)

    # Se repiten las regiones tantas veces como hay fechas
    # (Para que cada país tenga todas las fechas)
    df_out = data_noFechas.loc[np.repeat(data.index.values, num_fechas)]
    df_out = df_out.reset_index()

    # Se repite la secuencia de fechas tantas veces como hay regiones
    # (Se elimina la columna de index creada previamente por "reset_index")
    df_out["Date"] = pd.DataFrame(list(fechas) * num_regiones)
    df_out = df_out.drop(columns = ["index"])

    # Se extraen los casos confirmados y se ordenan para que coincidan con el orden
    # del resto de la tabla.
    df_out[column_name] = np.reshape(data.iloc[:, 4:].values, (-1, 1))

    return(df_out, fecha_mas_reciente)

confirmed = pd.read_csv("./Dataset/time_series_covid19_confirmed_global.csv")
coso, fechas = to_database_format(confirmed, "Confirmed")

In [9]:
fechas

'2021-11-24'

### Conversión de Datos a Formato de BD

In [14]:
# Lectura de archivo CSV
confirmed = pd.read_csv("./Dataset/time_series_covid19_confirmed_global.csv")
deaths = pd.read_csv("./Dataset/time_series_covid19_deaths_global.csv")
recovered = pd.read_csv("./Dataset/time_series_covid19_recovered_global.csv")

# Conversión a formato de base de datos
db_confirmed = to_database_format(confirmed, "Confirmed")
db_deaths = to_database_format(deaths, "Deaths")
db_recovered = to_database_format(recovered, "Recovered")

In [None]:
confirmed = pd.read_csv("./Dataset/time_series_covid19_confirmed_global.csv")
db_confirmed = to_database_format(confirmed, "Confirmed")

### Combinación de Datos

In [11]:
df = pd.merge(left = db_confirmed, right = db_deaths, on = ["Province/State", "Country/Region", "Lat", "Long", "Date"], how = "left")

df_merged = pd.merge(left = df, right = db_recovered, on = ["Province/State", "Country/Region", "Lat", "Long", "Date"], how = "left")

# Convierte las fechas a datetime
df_merged["Date"] = pd.to_datetime(df_merged["Date"])

In [12]:
df_merged

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.939110,67.709953,2020-01-22,0,0,0.0
1,,Afghanistan,33.939110,67.709953,2020-01-23,0,0,0.0
2,,Afghanistan,33.939110,67.709953,2020-01-24,0,0,0.0
3,,Afghanistan,33.939110,67.709953,2020-01-25,0,0,0.0
4,,Afghanistan,33.939110,67.709953,2020-01-26,0,0,0.0
...,...,...,...,...,...,...,...,...
188435,,Zimbabwe,-19.015438,29.154857,2021-11-20,133615,4699,0.0
188436,,Zimbabwe,-19.015438,29.154857,2021-11-21,133647,4699,0.0
188437,,Zimbabwe,-19.015438,29.154857,2021-11-22,133674,4699,0.0
188438,,Zimbabwe,-19.015438,29.154857,2021-11-23,133674,4699,0.0
