## Import libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
# To install pickle:
# !pip install pickle5
# To install pandas:
# !pip install pandas
# To install numpy:
# !pip install numpy

In [3]:
# For pickle files:
import pickle
# For csv files
import pandas as pd
# For arrays
import numpy as np

In [4]:
# to install epiweeks in python:
# !pip install epiweeks

In [5]:
# For Epiweek:
from epiweeks import Week, Year
from datetime import date

# 1. Read pickle file

In [6]:
# Function to read file:
def read_pickle(file_path):
    infile = open(file_path,'rb')
    file = pickle.load(infile)
    infile.close()
    print(f'The file has a shape: {file.shape} and class: {type(file)}')
    return file

## Examples:

### Example 1

In [7]:
# Path to picke file:
# file_path = 'path_to_pickle.pkl'
file_path = 'Embeddings/features_resnet50.pkl'

# Read the file:
file = read_pickle(file_path)

The file has a shape: (164, 100) and class: <class 'numpy.ndarray'>


### Example 2

In [8]:
# Path to picke file:
# file_path = 'path_to_pickle.pkl'
file_path = 'Embeddings/features_transformer.pkl'

# Read the file:
file = read_pickle(file_path)

The file has a shape: (164, 100) and class: <class 'numpy.ndarray'>


## Numpy array to pandas dataframe:

In [9]:
# Function to split one column of features into multiple columns:
def split_columns(df):
    df_aux = pd.DataFrame(df['features'].tolist())
    df_aux = pd.concat( [df['Date'], df_aux], axis=1)
    return df_aux


# Function to genearte the dataframe from features and date arrays:
def generate_dataframe(features, dates):
    features_df = pd.DataFrame(columns=['Date', 'features'])
    for i in range(len(features)):
        image_features = features[i]
        date = dates[i]
        features_df = features_df.append({'Date': date, 'features': image_features}, ignore_index=True )
    # new df from the column of lists
    features_df = split_columns(features_df)
    return features_df

## Examples:

### Example 1:

In [10]:
# Read the date:

# Path to date
dates_path = 'Embeddings/dates.pkl'
# Read the file:
dates = read_pickle(dates_path)

The file has a shape: (165,) and class: <class 'numpy.ndarray'>


In [11]:
# Read the features:

# Path to picke file:
file_path = 'Embeddings/features_resnet50.pkl'
# Read the file:
file = read_pickle(file_path)

The file has a shape: (164, 100) and class: <class 'numpy.ndarray'>


In [12]:
# Genearate dataframe
features_df = generate_dataframe(file, dates)

# Show features as dataframe:
features_df.head()

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,2015-11-01,-0.086982,0.288081,-0.006342,0.208007,0.266013,-0.859696,0.074751,0.112848,-0.001884,...,0.262575,0.110694,-0.12636,-0.053719,-0.226204,-0.082629,-0.174591,-0.084881,-0.066012,0.071893
1,2015-11-08,0.04798,0.196725,0.042814,0.134543,0.370102,-0.990212,-0.021948,0.094639,0.055113,...,0.175898,0.260473,-0.164818,0.075305,-0.058506,0.035748,-0.139905,-0.09772,-0.018967,0.096694
2,2015-11-15,0.033984,0.103946,0.058474,0.127122,0.309244,-0.992553,-0.022212,0.147466,0.029719,...,0.124701,0.113317,-0.198573,-0.076236,-0.017778,-0.029593,-0.077974,0.015587,-0.033545,0.075111
3,2015-11-22,-0.048743,0.161994,-0.007947,0.187336,0.387438,-0.982925,0.004363,0.063091,0.083177,...,0.180346,0.149084,-0.086722,0.013918,-0.091514,0.082734,-0.102815,0.099621,-0.000714,0.047569
4,2015-11-29,-0.173518,0.013973,0.10072,0.073034,0.060532,-0.799869,0.141367,0.056267,-0.050752,...,0.032416,-0.235834,-0.217754,0.030347,-0.032033,0.027889,-0.310983,0.257912,0.004529,0.081347


In [13]:
features_df.to_csv('Embeddings/features_resnet50.csv', index=False)

### Example 2:

In [14]:
# Read the features:

# Path to picke file:
file_path = 'Embeddings/features_transformer.pkl'
# Read the file:
file = read_pickle(file_path)

The file has a shape: (164, 100) and class: <class 'numpy.ndarray'>


In [15]:
# Genearate dataframe
features_df = generate_dataframe(file, dates)

# Show features as dataframe:
features_df.head()

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,2015-11-01,0.387164,0.403747,0.008258,0.185946,-0.023333,0.175896,0.349484,0.33533,-0.110036,...,-0.001924,1.179506,0.642817,-0.005041,-0.533648,-0.636882,0.004286,0.052736,0.106446,-0.34541
1,2015-11-08,0.386666,0.403197,0.009119,0.186291,-0.022466,0.174298,0.349471,0.335451,-0.109947,...,-0.002548,1.178966,0.643421,-0.00444,-0.535207,-0.636864,0.004112,0.05082,0.106349,-0.345483
2,2015-11-15,0.386666,0.403197,0.009119,0.186291,-0.022466,0.174298,0.349471,0.335451,-0.109947,...,-0.002548,1.178966,0.643421,-0.00444,-0.535207,-0.636864,0.004112,0.05082,0.106349,-0.345483
3,2015-11-22,0.386666,0.403197,0.009119,0.186291,-0.022466,0.174298,0.349471,0.335451,-0.109947,...,-0.002548,1.178966,0.643421,-0.00444,-0.535207,-0.636864,0.004112,0.05082,0.106349,-0.345483
4,2015-11-29,0.386666,0.403197,0.009119,0.186291,-0.022466,0.174298,0.349471,0.335451,-0.109947,...,-0.002548,1.178966,0.643421,-0.00444,-0.535207,-0.636864,0.004112,0.05082,0.106349,-0.345483


In [16]:
features_df.to_csv('Embeddings/features_transformer.csv', index=False)

# 2. Read csv file

## Examples:

### Example 1:

In [13]:
# Path to csv file:
# file_path = 'path_to_csv.csv'
file_path = 'Embeddings/pca_medellin_100features.csv'

# Read csv file
file = pd.read_csv(file_path)

# Show the dataframe:
# Number of rows to show:
rows = 5 
# show:
file.head(rows)

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,2015-11-01,98.435754,108.962228,-40.915729,39.468939,47.325778,70.472168,187.431726,-229.80174,43.888213,...,0.587965,-0.365837,-2.923909,0.096117,-0.608161,0.119047,1.095555,-1.305447,-1.034465,-0.486601
1,2015-11-08,98.435754,108.962228,-40.915729,39.468939,47.325778,70.472168,187.431726,-229.80174,43.888213,...,0.587965,-0.365837,-2.923909,0.096117,-0.608161,0.119047,1.095555,-1.305447,-1.034465,-0.486601
2,2015-11-15,-149.578506,118.675591,-28.032221,-37.09669,-3.305299,-67.746083,37.584909,-11.838416,-15.613838,...,-13.746142,13.683615,-1.131025,-3.349845,-10.681103,-6.43745,12.024817,12.829025,12.699458,0.47561
3,2015-11-22,-149.578506,118.675591,-28.032221,-37.09669,-3.305299,-67.746083,37.584909,-11.838416,-15.613838,...,-13.746142,13.683615,-1.131025,-3.349845,-10.681103,-6.43745,12.024817,12.829025,12.699458,0.47561
4,2015-11-29,-28.099436,34.715737,47.153387,-50.761985,-63.137078,-58.088638,-6.508121,18.149721,-29.78131,...,-6.670381,-5.520467,-4.101447,1.232668,-2.903102,2.551955,-3.338012,-5.490846,-2.383766,-1.712792


### Example 2

In [14]:
# Path to csv file:
# file_path = 'path_to_csv.csv'
file_path = 'Embeddings/embeddings_medellin_100features.csv'

# Read csv file
file = pd.read_csv(file_path)

# Show the dataframe:
# Number of rows to show:
rows = 5 
# show:
file.head(rows)

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,2015-11-01,-0.773164,0.409429,-1.967424,-0.194265,1.589418,-0.806473,3.013131,-0.131107,-1.274766,...,1.858297,-1.143137,-0.012872,2.54545,-1.408913,2.835881,-4.650385,2.829885,-3.892387,-2.027068
1,2015-11-08,-0.375792,1.282191,-1.769311,-1.768404,0.960811,-1.401841,-0.138934,0.260522,-1.184208,...,1.427782,-2.927658,0.504012,-0.06258,3.175248,-1.147623,-2.653756,2.155063,-1.43332,-1.319588
2,2015-11-15,0.27006,1.896669,0.160804,0.100467,-1.59364,-0.673158,0.158351,1.709716,-0.97631,...,1.151502,-1.066319,0.433531,0.623669,1.678527,-2.044615,0.735826,1.30758,-0.059874,-0.276768
3,2015-11-22,-1.050474,-0.492467,-3.057118,-2.102062,0.290074,-1.837421,0.771637,0.883183,0.827531,...,1.586739,-0.414671,-0.480104,-1.629912,3.934243,2.386098,-1.041708,0.388762,0.855838,-1.768747
4,2015-11-29,0.277096,-0.177303,1.520242,1.147505,0.999619,0.343843,1.057434,1.925118,0.39596,...,1.060054,-0.819456,-0.657092,0.556108,3.519002,-1.769988,-2.901545,0.58261,3.461969,-1.540928


## CSV file to numpy array and dictionary:

In [15]:
# Function to generate:
# 1. Numpy array of features
# 2. Dictionary of key = date and value = features of image the in that date:
def get_embeddings_array(df):
    embeddings_list = np.array(df.iloc[:,1:])
    embeddings_date = np.array(df.iloc[:,0])
    embeddings_dict = dict()
    for date, embedding in zip(embeddings_date, embeddings_list):
        embeddings_dict[date] = embedding
        
    print(f'Array of shape: {embeddings_list.shape} and class: {type(embeddings_list)} generated!')
    print(f'Dictionary of class: {type(embeddings_dict)}, with keys = dates and values = features generated!')
    return embeddings_dict, embeddings_list

## Examples:

### Example 1:

In [16]:
# Path to csv file:
file_path = 'Embeddings/pca_medellin_100features.csv'
# Read csv file
features_df = pd.read_csv(file_path)

features_dictionary, features_array = get_embeddings_array(features_df)

Array of shape: (165, 100) and class: <class 'numpy.ndarray'> generated!
Dictionary of class: <class 'dict'>, with keys = dates and values = features generated!


### Example 2:

In [17]:
# Path to csv file:
file_path = 'Embeddings/embeddings_medellin_200features.csv'
# Read csv file
features_df = pd.read_csv(file_path)

features_dictionary, features_array = get_embeddings_array(features_df)

Array of shape: (165, 200) and class: <class 'numpy.ndarray'> generated!
Dictionary of class: <class 'dict'>, with keys = dates and values = features generated!


# 3. Convert dates to epiweek

In [18]:
def getEpiweek(date_str):
    date_ls = date_str.split('-')
    week_date = date(int(date_ls[0]), int(date_ls[1]), int(date_ls[2]))
    week = Week.fromdate(week_date)
    return week

In [19]:
def dates_to_epiweek(dates_array):
    epiweeks_array = list()
    for date in dates_array:
        epiweek = getEpiweek(date)
        epiweeks_array.append(epiweek)
    epiweeks_array = np.array(epiweeks_array)
    print(f'The array with epiweeks of shape: {epiweeks_array.shape} and class: {type(epiweeks_array)} was generated!')
    return epiweeks_array

## Examples

### Example 1 
###### Part 1: Using the pickle file with dates to generate an epiweek array:

In [20]:
# Read the date:

# Path to date
dates_path = 'Embeddings/dates.pkl'
# Read the file:
dates = read_pickle(dates_path)

The file has a shape: (165,) and class: <class 'numpy.ndarray'>


In [21]:
# Convert dates to epiweeks
epiweeks_array = dates_to_epiweek(dates)

The array with epiweeks of shape: (165,) and class: <class 'numpy.ndarray'> was generated!


### Example 1 

###### Part 2: Using the array of epiweeks create to generate a dataframe with features and epiweek:

In [22]:
# Read the features:

# Path to picke file:
file_path = 'Embeddings/features100_vae.pkl'
# Read the file:
file = read_pickle(file_path)

The file has a shape: (165, 100) and class: <class 'numpy.ndarray'>


In [23]:
# Genearate dataframe
features_df = generate_dataframe(file, epiweeks_array)

# Show features as dataframe:
features_df.head()

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,201544,-0.773164,0.409429,-1.967424,-0.194265,1.589418,-0.806473,3.013131,-0.131107,-1.274766,...,1.858297,-1.143137,-0.012872,2.54545,-1.408913,2.835881,-4.650385,2.829885,-3.892387,-2.027068
1,201545,-0.375792,1.282191,-1.769311,-1.768404,0.960811,-1.401841,-0.138934,0.260522,-1.184208,...,1.427782,-2.927658,0.504012,-0.06258,3.175248,-1.147623,-2.653756,2.155063,-1.43332,-1.319588
2,201546,0.27006,1.896669,0.160804,0.100467,-1.59364,-0.673158,0.158351,1.709716,-0.97631,...,1.151502,-1.066319,0.433531,0.623669,1.678527,-2.044615,0.735826,1.30758,-0.059874,-0.276768
3,201547,-1.050474,-0.492467,-3.057118,-2.102062,0.290074,-1.837421,0.771637,0.883183,0.827531,...,1.586739,-0.414671,-0.480104,-1.629912,3.934243,2.386098,-1.041708,0.388762,0.855838,-1.768747
4,201548,0.277096,-0.177303,1.520242,1.147505,0.999619,0.343843,1.057434,1.925118,0.39596,...,1.060054,-0.819456,-0.657092,0.556108,3.519002,-1.769988,-2.901545,0.58261,3.461969,-1.540928


### Example 2 
###### Convert the column date from a csv file to epiweeks:

In [24]:
# Path to csv file:
file_path = 'Embeddings/embeddings_medellin_100features.csv'

# Read csv file
file = pd.read_csv(file_path)

# Show the dataframe before apply the function:
# Number of rows to show:
rows = 5 
# show:
file.head(rows)

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,2015-11-01,-0.773164,0.409429,-1.967424,-0.194265,1.589418,-0.806473,3.013131,-0.131107,-1.274766,...,1.858297,-1.143137,-0.012872,2.54545,-1.408913,2.835881,-4.650385,2.829885,-3.892387,-2.027068
1,2015-11-08,-0.375792,1.282191,-1.769311,-1.768404,0.960811,-1.401841,-0.138934,0.260522,-1.184208,...,1.427782,-2.927658,0.504012,-0.06258,3.175248,-1.147623,-2.653756,2.155063,-1.43332,-1.319588
2,2015-11-15,0.27006,1.896669,0.160804,0.100467,-1.59364,-0.673158,0.158351,1.709716,-0.97631,...,1.151502,-1.066319,0.433531,0.623669,1.678527,-2.044615,0.735826,1.30758,-0.059874,-0.276768
3,2015-11-22,-1.050474,-0.492467,-3.057118,-2.102062,0.290074,-1.837421,0.771637,0.883183,0.827531,...,1.586739,-0.414671,-0.480104,-1.629912,3.934243,2.386098,-1.041708,0.388762,0.855838,-1.768747
4,2015-11-29,0.277096,-0.177303,1.520242,1.147505,0.999619,0.343843,1.057434,1.925118,0.39596,...,1.060054,-0.819456,-0.657092,0.556108,3.519002,-1.769988,-2.901545,0.58261,3.461969,-1.540928


In [25]:
# Change the date in column date to epiweek:
file['Date'] = file['Date'].apply(getEpiweek)

# Show the dataframe after:
# Number of rows to show:
rows = 5 
# show:
file.head(rows)

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,201544,-0.773164,0.409429,-1.967424,-0.194265,1.589418,-0.806473,3.013131,-0.131107,-1.274766,...,1.858297,-1.143137,-0.012872,2.54545,-1.408913,2.835881,-4.650385,2.829885,-3.892387,-2.027068
1,201545,-0.375792,1.282191,-1.769311,-1.768404,0.960811,-1.401841,-0.138934,0.260522,-1.184208,...,1.427782,-2.927658,0.504012,-0.06258,3.175248,-1.147623,-2.653756,2.155063,-1.43332,-1.319588
2,201546,0.27006,1.896669,0.160804,0.100467,-1.59364,-0.673158,0.158351,1.709716,-0.97631,...,1.151502,-1.066319,0.433531,0.623669,1.678527,-2.044615,0.735826,1.30758,-0.059874,-0.276768
3,201547,-1.050474,-0.492467,-3.057118,-2.102062,0.290074,-1.837421,0.771637,0.883183,0.827531,...,1.586739,-0.414671,-0.480104,-1.629912,3.934243,2.386098,-1.041708,0.388762,0.855838,-1.768747
4,201548,0.277096,-0.177303,1.520242,1.147505,0.999619,0.343843,1.057434,1.925118,0.39596,...,1.060054,-0.819456,-0.657092,0.556108,3.519002,-1.769988,-2.901545,0.58261,3.461969,-1.540928
