In [None]:
%reset

In [None]:
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv('bic_monthly_dollars_sellout.csv' , parse_dates = ['date'])

In [None]:
data.head()

In [None]:
for i in data.columns:
    print(i)

In [None]:
'''
US STA SHA BCI CLI EMP
MX STA SHA BCI EMP
BR STA SHA BCI CLI
AR STA SHA
'''

In [None]:
def reshape_data(df, selected_columns, my_id_var, series_id, column_name):
    '''
    This function takes a dataset, subsets it based on the country, 
    reshapes the data and writes out the file based on the country name.
    
    1) subset the dataset by country
    2) reshape the data_subset 
    3) creates the file name by:
        a) getting the column names for the subset
        b) popping out the last column 'date'
        c) splitting the column names to extract just the name of the country
        d) concatnating the country name with a set suffix ('_sellout_dollars')
    4) renameing and returning the reshaped dataset
    '''
    # 1) first select the columns from the dataset
    
    data_subset = df[selected_columns]
    
    # 2) reshape the subset data
    
    new_df = data_subset.melt(id_vars = my_id_var,
                               var_name = series_id,
                               value_name = column_name)
    
    # 3) create the file name
    
    name_prefix = []
    column_names = list(data_subset.columns)
    column_names.pop(-1)
    
    for item in column_names:
        name_prefix.append(item.split()[0])
        file_name_prefix = set(name_prefix)
 
    # 4) renaming the dataframe
    
    new_df.name = str(file_name_prefix) + '_sellout_dollars' 
    
    return new_df  

In [None]:
selected_columns_AR = ['AR STA', 'AR SHA', 'date']
my_id_var = ['date']
series_id = 'series_id'
column_name = 'sellout_dollars'

In [None]:
AR_data = reshape_data(df = data,
                       selected_columns = selected_columns_AR,
                       my_id_var = my_id_var,
                       series_id = series_id,
                       column_name = column_name)

In [None]:
selected_columns_BR = ['BR STA', 'BR SHA', 'date']
my_id_var = ['date']
series_id = 'series_id'
column_name = 'sellout_dollars'

In [None]:
BR_data = reshape_data(df = data,
                       selected_columns = selected_columns_BR,
                       my_id_var = my_id_var,
                       series_id = series_id,
                       column_name = column_name)

In [None]:
selected_columns_MX = ['MX STA', 'MX SHA', 'date']
my_id_var = ['date']
series_id = 'series_id'
column_name = 'sellout_dollars'

In [None]:
MX_data = reshape_data(df = data,
                       selected_columns = selected_columns_MX,
                       my_id_var = my_id_var,
                       series_id = series_id,
                       column_name = column_name)

In [None]:
selected_columns_US = ['US STA', 'US SHA', 'date']
my_id_var = ['date']
series_id = 'series_id'
column_name = 'sellout_dollars'

In [None]:
US_data = reshape_data(df = data,
                       selected_columns = selected_columns_US,
                       my_id_var = my_id_var,
                       series_id = series_id,
                       column_name = column_name)

In [None]:
list(US_data['series_id'].unique())

In [None]:
US_column_to_be_added = data[['US BCI', 'US CLI', 'US UNEMP RATE', 'date']]
US_merged_data = pd.merge(US_data, US_column_to_be_added, on = 'date', how = 'left')
US_merged_data.rename(columns = {'US UNEMP RATE': 'emp', 
                                 'US BCI': 'BCI',
                                 'US CLI': 'CLI'}, 
                      inplace = True)
US_merged_data.to_csv('BIC_US_sellout_dollars.csv', encoding = 'utf8', index = False)

In [None]:
MX_column_to_be_added = data[['MX BCI', 'MX UNEMP RATE', 'date']]
MX_merged_data = pd.merge(MX_data, MX_column_to_be_added, on = 'date', how = 'left')
MX_merged_data.rename(columns = {'MX UNEMP RATE': 'emp',
                                 'MX BCI': 'BCI'}, 
                      inplace = True)
MX_merged_data.to_csv('BIC_MX_sellout_dollars.csv', encoding = 'utf8', index = False)

In [None]:
BR_column_to_be_added = data[['BR BCI', 'BR CLI', 'date']]
BR_merged_data = pd.merge(BR_data, BR_column_to_be_added, on = 'date', how = 'left')
BR_merged_data.rename(columns = {'BR BCI': 'BCI',
                                 'BR CLI': 'CLI'}, 
                      inplace = True)
BR_merged_data.to_csv('BIC_BR_sellout_dollars.csv', encoding = 'utf8', index = False)

In [None]:
AR_data.to_csv('BIC_AR_sellout_dollars.csv', encoding = 'utf8', index = False)

In [None]:
# US and MX data

US_MX_data = US_data.append(pd.DataFrame(data = MX_merged_data), ignore_index=True)

US_MX_data.shape

In [None]:
# AR and BR data

AR_BR_data = AR_data.append(pd.DataFrame(data = BR_data), ignore_index=True)

US_MX_data.shape

In [None]:
AR_BR_data

In [None]:
US_MX_data.to_csv('US_MX_sellout_dollars_data_BIC.csv', encoding = 'utf8', index = False)
AR_BR_data.to_csv('AR_BR_sellout_dollars_data_BIC.csv', encoding = 'utf8', index = False)

In [None]:
alldfs = [var for var in dir() if isinstance(eval(var), pd.core.frame.DataFrame)]

print(alldfs) 


In [None]:
for series_id, seriesdata in US_data.groupby('series_id'):
    break

In [None]:
seriesdata

In [None]:
US_data.groupby('series_id').get_group('US STA')