In [1]:
import tradingeconomics as te
import datetime
from datetime import datetime, date
from dateutil.relativedelta import relativedelta
import time
import pandas as pd
import numpy as np
import os
from os import listdir
import requests
from joblib import Parallel, delayed
from tqdm import tqdm
from functools import reduce
from pathlib import Path

In [2]:
class TradingEconomics:
    
    def __init__(self,
                 directory_path = None,
                 sub_file_storing_folder = None,
                 main_file_storing_folder = None,
                 today_date = None,
                 last_refresh_date = None,
                 from_scratch = None,
                 exisiting_data = None,
                 countries = None,
                 copy_path_final_file = None
                ):  
        """
        Author: Zung-Ru Lin
        2023
        
        :param directory_path: default working directory that should have apikey.txt
        :param sub_file_storing_folder: store all the files except the last master file
        :param main_file_storing_folder: store only the master file
        :param today_date: today's date
        :param last_refresh_date: earliest date of the rows
        :param from_scratch: automatically set last_refresh_date to 2005-1-1
        :param exisiting_data: path to existing table 
        :param countries: countries to be included
        :param copy_path_final_file: save a copy of final file to the path 
        
        """
        
        # Initialize class variables  
        self.dir = directory_path
        os.chdir(self.dir)
        self.sub_folder = sub_file_storing_folder
        self.main_folder = main_file_storing_folder
        self.countries = countries
        self.today_date = today_date
        self.cores = int(os.cpu_count())
        self.copy_path_final_file = copy_path_final_file
        
        
        # Raise error if key variables weren't properly assigned
            
        if self.sub_folder is None or not os.path.isdir(self.sub_folder):
            try:
                os.makedirs(self.sub_folder)
            except Exception as err:
                raise ValueError(f'Error creating folder for sub files: {self.sub_folder}. Exception: {err}')

        if self.main_folder is None or not os.path.isdir(self.main_folder):
            try:
                os.makedirs(self.main_folder)
            except Exception as err:
                raise ValueError(f'Error creating folder for main files: {self.main_folder}. Exception: {err}')

        if self.countries == None or self.countries == []:
            raise ValueError('Invalid or no argument assigned- countries: please list all countries to be updated')
        else:
            print("Countries for update:\n" + "\n".join([f"- {item}" for item in self.countries]))
            
        if not os.path.isfile(self.dir + 'apikey.txt'):
            raise FileNotFoundError(f'File "apikey.txt" is not present in the assigned directory {self.dir} ')
            
        
        # Login to TE with API Key
        if os.path.exists('apikey.txt'):
            with open('apikey.txt', 'r') as apikeyfile:
                apikey = apikeyfile.read()
        else:
            raise FileNotFoundError(f'apikey.txt not found in the assigned directory  {self.dir} ')

        te.login(apikey)
            

        # If not collect TE data from scratch, check if existing file is assigned
        if from_scratch == 1:
            self.exisiting_data = pd.DataFrame()
            self.last_refresh_date = '2005-01-01'
            
        else:
            if not os.path.exists(exisiting_data):
                raise FileNotFoundError(f'File: {exisiting_data} does not exist in the assigned folder {self.dir}.\n Look for final_historical_te_data_(year)-(month)-(day).csv in working directory')
            else:
                self.exisiting_data = pd.read_csv(exisiting_data, index_col = 0)    
            if last_refresh_date is None:
                self.last_refresh_date = self.exisiting_data[(self.exisiting_data['Country'] == 'United States') & (pd.notnull(self.exisiting_data['currency']))]['Date'].max() + '-01'
            else:
                self.last_refresh_date = last_refresh_date
                
                
    def divide_list_into_chunks(self, input_list, chunk_size):
        
        for i in range(0, len(input_list), chunk_size):
            yield input_list[i:i + chunk_size]
        
        
    def get_all_country_indicators(self):
        
        print(f'\n(1/6) Getting all country-indicator combinations.')
        
        # Check whether today's gdp_df has been created
        gdp_filename = os.path.join(self.sub_folder, f'gdp_df_{self.today_date}.csv')
        if not any(f.endswith(f'gdp_df_{self.today_date}.csv') for f in os.listdir(self.sub_folder)):
            gdp_df = te.getIndicatorData(indicators='gdp', country='all', output_type='df')
        else:
            print(f'Reading gdp_df_{self.today_date}.csv that was already created.')
            gdp_df = pd.read_csv(gdp_filename)
            
        self.all_country_indicators = pd.DataFrame()
        exceptions_df = pd.DataFrame()
        self.unique_country_list = list(set(self.countries)) 
        filename = self.sub_folder + '/all_country_indicators_' + self.today_date + '.csv'
        
        # Generate all_country_indicators table or read from an existing one if it has been previously created
        country_collected = set()
        country_left = list(set(self.countries)-country_collected)
        
        # Generate all_country_indicators table or read from an existing one if it has been previously created

        if not any(f.endswith(f'all_country_indicators_{self.today_date}.csv') for f in os.listdir(self.sub_folder)):
            
            rerun_time = 3
            while len(country_left)>0 and rerun_time>0:
                
                for ctry in tqdm(country_left):
                    retries = 5  # maximum number of retries
                    while retries > 0:
                        try:
                            
                            temp = te.getIndicatorData(country=[str(ctry)], output_type='df')
                            self.all_country_indicators = self.all_country_indicators.append(temp)

                            try:
                                if len(temp)>0:
                                    # print(ctry)
                                    country_collected.add(ctry)
                                    time.sleep(1)
    
                            except:
                                print(ctry, ' encountered HTTP error')
                                pass
                            time.sleep(1)
                            break  # exit the loop if successful
                        
                        except Exception as err:
                            print(6-retries, ' tries failed')
                            error_temp = pd.DataFrame(columns=['Country', 'Error Code', 'Date'])
                            error_temp['Country'] = ctry
                            error_temp['Error Code'] = err.code
                            error_temp['Date'] = date.today()
                            exceptions_df = exceptions_df.append(error_temp)
                            retries -= 1  # decrease the number of retries
                            if retries == 0:
                                print('All trials failed: ',err)  # if no more retries, raise the error
                            time.sleep(5)
                            pass
                    time.sleep(1)
                country_left = list(set(self.countries)-country_collected)
                
                if len(country_left)>0:
                    print(f'{len(country_left)} countries left: Rerun {country_left}')
                    rerun_time -= 1
                    if rerun_time == 0:
                        print(f'{country_left} has no response from the API')
                else:
                    print('Done reading all countries!')

            self.all_country_indicators.to_csv(filename, header = True)
        else:
            print(f'reading all_country_indicators_{self.today_date}.csv that was already created.')
            self.all_country_indicators = pd.read_csv(filename, index_col=0)        
            
        # Process columns of all_country_indicators
        self.all_country_indicators = self.all_country_indicators[self.all_country_indicators['Country'].isin(self.countries)]      
        self.unique_indicators = sorted(list(self.all_country_indicators['Category'].unique()))
        self.all_country_indicators['Category'] = [x.strip() for x in self.all_country_indicators['Category']]
        self.unique_combos = self.all_country_indicators[['Country', 'Category']].drop_duplicates()
        
        # print(self.unique_combos[self.unique_combos['Country']=='El Salvador']['Category'].unique())

        
        self.hist_start = datetime.strptime('2005-01-01', '%Y-%m-%d')
        self.hist_end = datetime.strptime(self.last_refresh_date, '%Y-%m-%d')
        
        
    def get_indicator_frequency(self):
        
        print("\n(2/6) Building frequency table for indicators.")
        
        n_per_chunk = 16
        self.frequency = pd.DataFrame()
        csv_file_name = f'{self.sub_folder}/frequency_combinations_{self.today_date}.csv'
        
        # Generate frequency_combinations table or read from an existing one if it has been previously created
        if not any(f.endswith(f'frequency_combinations_{self.today_date}.csv') for f in os.listdir(self.sub_folder)):
            for indicator in tqdm(self.unique_combos['Category'].unique()):
                relevant_countries_this_indicator = self.unique_combos[self.unique_combos['Category'] == indicator]['Country'].unique()
                for countries_chunk in self.divide_list_into_chunks(relevant_countries_this_indicator, n_per_chunk):
                    mydata = te.getIndicatorData(country=list(countries_chunk), indicators=[indicator], output_type='df')
                    self.frequency = pd.concat([self.frequency, mydata], axis=0)
                    time.sleep(1)
            self.frequency.to_csv(csv_file_name)
        else:
            print(f'reading frequency_combinations_{self.today_date}.csv that was already created.')
            self.frequency = pd.read_csv(csv_file_name, index_col=0)
            
        # Process columns of frequency_combinations
        self.frequency = self.frequency[['Country', 'Category', 'Frequency']]
        self.frequency_upper = self.frequency.copy()
        self.frequency['Category'] = [x.lower().strip() for x in self.frequency['Category']]
        self.frequency_upper['Category'] = [x.strip() for x in self.frequency_upper['Category']]
        
        
    def get_TE_data(self, initDate = None, endDate = None):
        
        print("\n(3/6) Pulling Trading Economic data.")

        initDate = self.last_refresh_date if initDate is None else initDate
        endDate = self.today_date if endDate is None else endDate
              
        n_per_chunk = 19
        missing_combos = []
        
        countries = list(self.all_country_indicators['Country'].unique())    
        self.unique_indicators = list(self.all_country_indicators['Category'].unique())
        self.unique_indicators.sort()
        
        # Pull trading economic data within a date range by category and country
        for indicator in tqdm(self.unique_combos['Category'].unique()):
            relevant_countries_this_indicator = self.unique_combos[self.unique_combos['Category'] == indicator]['Country'].unique()
            for countries_chunk in self.divide_list_into_chunks(relevant_countries_this_indicator, n_per_chunk):

                countries_this_chunk = [x for x in self.divide_list_into_chunks(relevant_countries_this_indicator, n_per_chunk)]
                country_hypothetical_filenames = [f'historical_data_{self.last_refresh_date}_til_{self.today_date}_{indicator}_{country}.csv' for country in countries_this_chunk]
                country_hypothetical_filenames = [item for sublist in country_hypothetical_filenames for item in sublist]
                today_pull_file_stem = f'historical_data_{self.last_refresh_date}_til_{self.today_date}'
                existing_filenames = [f for f in os.listdir(f"{self.dir}{self.sub_folder}") if (f.startswith(today_pull_file_stem) and indicator in f)]
                missing_filenames_for_this_chunk = [x for x in country_hypothetical_filenames if x not in existing_filenames]

                max_tries = 5
                tries = 0
                mydata = None

                

                if len(missing_filenames_for_this_chunk) > 0:
                    while tries < max_tries and np.all(pd.isnull(mydata)):

                        tries += 1
                        mydata = te.getHistoricalData(country=countries_chunk, indicator=[indicator], initDate=initDate, output_type= 'df')
                        time.sleep(1)

                        if tries >= 2 and np.all(pd.isnull(mydata)):
                            print(f"{tries} tries failed for indicator: {indicator}")

                    if not np.all(pd.isnull(mydata)):
                        
                        for country in countries_chunk:
                            country_indicator_file_name = f"{self.sub_folder}/historical_data_{self.last_refresh_date}_til_{self.today_date}_{indicator}_{country}.csv"

                            if isinstance(mydata, dict):
                                try:
                                    if not np.all(pd.isnull(mydata[country])):
                                        for a in mydata[country][indicator][0]:
                                            if not np.all(pd.isnull(a)):
                                                dfa = pd.DataFrame(a)
                                                dfa.insert(0, 'country', country)
                                                dfa.insert(1, 'indicator', indicator)
                                    dfa.to_csv(country_indicator_file_name)
                                
                                except Exception as err:
                                    print(err)
                                    pass
                            
                            else:
                                dfb = mydata.loc[(mydata['Country'] == country) & (mydata['Category'] == indicator)].copy()
                                dfb = dfb[['Country', 'Category', 'DateTime', 'Value']]
                                dfb.columns = ['country', 'indicator', 'date', '0']
                                dfb['date'] = pd.to_datetime(dfb['date'])
                                dfb = dfb.set_index('date')

                                # sort values by date
                                dfb = dfb.sort_values('date')
                                dfb.index.name = None
                                dfb.to_csv(country_indicator_file_name)

                    else:
                        for country in countries_chunk:
#                             print(f'Pulled data is null for {indicator} and {country}')
                            missing_combos.append((country, indicator))
                    time.sleep(1)        
                        
                
                    
        for x in missing_combos:
            self.all_country_indicators = self.all_country_indicators[~((self.all_country_indicators['Country'] == x[0]) & (self.all_country_indicators['Category'] == x[1]))]
            filename = f"{self.sub_folder}/all_country_indicators_{self.today_date}.csv"
            self.all_country_indicators.to_csv(filename, header=True)

            self.unique_combos = self.unique_combos[~((self.unique_combos['Country'] == x[0]) & (self.unique_combos['Category'] == x[1]))]
        

    
            
    def get_monthly_date(self, date):
        
        if isinstance(date, str):
            return date[:7]
        return date

    
    def process_indicators(self, indicators, submonthly_combos, aggregation_method):
        
        print(f'Taking only the {aggregation_method} value of a month for {len(indicators)} indicators.')
        
        result_df = pd.DataFrame()
        
        # Value selection of submonthly data
        for indicator in tqdm(indicators):
            possible_countries = submonthly_combos[submonthly_combos['Category'] == indicator]

            for country in possible_countries['Country'].unique():
                filename = f"{self.sub_folder}/historical_data_{self.last_refresh_date}_til_{self.today_date}_{indicator}_{country}.csv"

                try:
                    country_df = pd.read_csv(filename)
                    country_df.rename(columns={'Unnamed: 0': 'Date', 'country': 'Country', 'indicator': 'Category', '0': 'Value'}, inplace=True)
                    country_df['Date'] = [datetime.strftime(d, '%Y-%m') for d in pd.to_datetime(country_df['Date'])]
                    country_df['Category'] = [x.lower() for x in country_df['Category']]

                    for month in country_df['Date'].unique():
                        monthly_row = pd.DataFrame()
                        monthly_row['Country'] = country_df['Country'].unique()
                        monthly_row['Category'] = country_df['Category'].unique()
                        monthly_row['Date'] = country_df[country_df['Date'] == month]['Date'].unique()
                        monthly_row['Value'] = country_df[(country_df['Date'] == month)]['Value'].agg(aggregation_method)
    
                        result_df = pd.concat([result_df, monthly_row], axis=0)

                except Exception as err:
                    print(err)
                    pass

        return result_df.reset_index(drop=True)
    
    
    def tall_master_file(self, initDate=None, endDate=None):
        
        print('\n(4/6) Building tall master file.')
        
        initDate = self.last_refresh_date if initDate is None else initDate
        endDate = self.today_date if endDate is None else endDate
        
        #
        countries = list(self.all_country_indicators['Country'].unique())
        
        # filepaths = [f"{self.sub_folder}/{f}" for f in listdir(self.sub_folder) if f.endswith(".csv") and endDate in f and initDate in f and "historical_data_" in f]
        # files = Parallel(n_jobs=self.cores)(delayed(pd.read_csv)(f) for f in tqdm(filepaths))
        # tall = pd.concat(files)
        # del files
        filepaths = [f"{self.sub_folder}/{f}" for f in listdir(self.sub_folder) if f.endswith(".csv") and endDate in f and initDate in f and "historical_data_" in f]
        files = Parallel(n_jobs=self.cores)(delayed(pd.read_csv)(f) for f in tqdm(filepaths))
        
        # files = [f for f in files if len(f) > 0]  # filter out empty DataFrames
        if len(files) > 0:
            tall = pd.concat(files)        
            del files
        else:
            raise ValueError("No non-empty DataFrames found in files")  # raise an error if no non-empty DataFrames are found  # create an empty DataFrame if no non-empty DataFrames are found

        
        tall.rename(columns={"Unnamed: 0": "Date", "country": "Country", "indicator": "Category", "0": "Value"}, inplace=True)
        
        tall = tall[tall["Date"].notnull()]    
        tall.drop_duplicates(inplace=True)
        tall.reset_index(drop=True, inplace=True)
        tall["Category"] = [x.lower() for x in tall["Category"]]
        tall['Date'] = [x[0:7] if isinstance(x, str) else x for x in tall['Date']]
        tall["Date"] = [self.get_monthly_date(x) for x in tall["Date"]]

        
        print("\n(5/6) Aggregating submonthly indicators.")
        
        # Customize methods for indicators to fit daily/ weekly/ biweekly data into a monthly df
        submonthly_frequencies = ['Daily', 'Weekly', 'Biweekly']
        submonthly_combos = self.frequency_upper[self.frequency_upper['Frequency'].isin(submonthly_frequencies)][['Country', 'Category', 'Frequency']]
        
        indicators_to_max = ['Coronavirus Cases', 'Coronavirus Recovered', 'Coronavirus Deaths']
        indicators_to_sum = ['Initial Jobless Claims', 'Continuing Jobless Claims', 'Foreign Stock Investment', 'Foreign Bond Investment']
        indicators_to_avg = list(set(submonthly_combos['Category'].unique()) - set(indicators_to_sum) - set(indicators_to_max))
        
        self.max_indicators_tall_nonsparse = self.process_indicators(indicators_to_max, submonthly_combos, 'max')
        self.sum_indicators_tall_nonsparse = self.process_indicators(indicators_to_sum, submonthly_combos, 'sum')
        self.avg_indicators_tall_nonsparse = self.process_indicators(indicators_to_avg, submonthly_combos, 'mean')

        submonthly_max_filepath = f"{self.sub_folder}/max_indicators_tall_nonsparse_{initDate}_to_{endDate}.csv"
        submonthly_sum_filepath = f"{self.sub_folder}/sum_indicators_tall_nonsparse_{initDate}_to_{endDate}.csv"
        submonthly_avg_filepath = f"{self.sub_folder}/avg_indicators_tall_nonsparse_{initDate}_to_{endDate}.csv"
        
        self.max_indicators_tall_nonsparse.to_csv(submonthly_max_filepath)
        self.sum_indicators_tall_nonsparse.to_csv(submonthly_sum_filepath)
        self.avg_indicators_tall_nonsparse.to_csv(submonthly_avg_filepath)
        
        # Finalize table for submonthly indicators
        self.submonthly_indicators_nonsparse = pd.concat([self.max_indicators_tall_nonsparse, self.avg_indicators_tall_nonsparse, self.sum_indicators_tall_nonsparse], axis=0).reset_index(drop=True)
        c_tuples = {(submonthly_combos['Country'].iloc[ii], submonthly_combos['Category'].iloc[ii]) for ii in range(len(submonthly_combos))}
        tall.reset_index(drop=True, inplace=True)

        to_drop = [ii for ii in tall.index if (tall['Country'].loc[ii], tall['Category'].loc[ii]) in c_tuples]

        tall.drop(labels=to_drop, axis=0, inplace=True)
        tall = tall.append(self.submonthly_indicators_nonsparse, sort=['Country', 'Date', 'Category'])

        return tall
    
        
                        
    def wide_master_file(self, tall = None):
                        
        print('(6/6) Finalizing dataframe with a few more steps')
        print('converting master file to wide format.')

        if tall is None:
            tall = self.master_te_data_tall

        wide = pd.pivot_table(
            tall,
            index=['Date', 'Country'],
            columns='Category',
            values='Value'
        ).reset_index()

        wide.index.name = None

        return wide
                  
                        
    def sparsify(self, tall = None, wide = None):
        
        print('creating sparse wide file.')

        if tall is None:
            tall = self.master_te_data_tall
        if wide is None:
            wide = self.master_te_data_wide

        tall['Category'] = [x.lower() if isinstance(x, str) else x for x in tall['Category']]


        if len(min(tall['Date'])) == 10:
            hist_start = min(tall['Date'])[:-3]
            hist_end = max(tall['Date'])[:-3]
        else:
            hist_start = min(tall['Date'])
            hist_end = max(tall['Date'])

        unique_countries = self.unique_combos['Country'].unique()
        unique_countries.sort()

        delta = relativedelta(datetime.strptime(self.today_date, '%Y-%m-%d'), datetime.strptime(self.last_refresh_date, '%Y-%m-%d'))
        all_dates = set([datetime.strptime(self.last_refresh_date, '%Y-%m-%d') + relativedelta(months=x) for x in range(0, (12 * delta.years + delta.months))])
        unique_dates = [datetime.strftime(d, '%Y-%m') for d in all_dates]
        unique_dates.sort()

        # Create a new multi-index for both date and country.
        index = pd.MultiIndex.from_product([unique_countries, unique_dates], names=['Country', 'Date'])
        all_combos = pd.DataFrame(index=index).reset_index()

        wide_sparse = pd.merge(all_combos, wide, on=['Country', 'Date'], how='outer')
        wide_sparse["Date"] = [self.get_monthly_date(x) for x in wide_sparse["Date"]]

        return wide_sparse
    
    
    def backward_fill(self, country, category):

        self.master_te_data_wide_sparse_lower = self.master_te_data_wide_sparse_lower.reset_index(drop=True)
        
        try:
            cat_df = self.master_te_data_wide_sparse_lower.loc[self.master_te_data_wide_sparse_lower['Country'] == country][['Date', 'Country', category]]


            cat_df = cat_df.sort_values(by=['Date'])
            
            # Look for corresponding frequency of indicator
            try:
                cat_f = self.frequency[(self.frequency['Country'] == country) & (self.frequency['Category']==category)]['Frequency'].iloc[0]     
            except:
                cat_f = 'yearly'
                    
            fill_limits = {
                    'quarterly': 2,
                    'yearly': 11,
                    'biannually': 23,
                    'default': 11
                }
            
            # back fill with n months pre-defined
            try:
                if cat_f.lower() in fill_limits:
                    limit = fill_limits.get(cat_f.lower()) 
                    cat_df = cat_df.fillna(method='bfill', limit=limit)

            except KeyError:
                cat_df = cat_df.fillna(method='bfill')

            return cat_df
        except:
            print(country, category)
            pass

            
        
        
    def backward_fill_country(self, country):
        
        categories = self.unique_combos.loc[self.unique_combos['Country'] == country, 'Category']
        country_results = [self.backward_fill(country, cc.lower()) for cc in categories]
        merged_results = reduce(lambda left, right: pd.merge(left, right, on=['Date', 'Country'], how='outer'), country_results)
        
        
        return merged_results
    
    
    def backward_fill_all(self, df=None):
        
        if df is None:
            df = self.master_te_data_wide_sparse_lower
            
        print('backfilling data.')
        countries = self.unique_combos['Country'].unique()
        self.results = [self.backward_fill_country(country) for country in tqdm(countries)]
        self.results = pd.concat(self.results, ignore_index=True)

        return self.results
    
        print("Starting data pull.")
        
        
    def main(self):

        # Get all available combinations of country & indicators.
        self.get_all_country_indicators()
        
        # Get frequency column 
        self.get_indicator_frequency()

        # Get te data.
        self.get_TE_data()

        # Combine, widen, sparsify, and backfill the data.

        # Combine data:
        self.master_te_data_tall = self.tall_master_file()
        tall_filename = f'{self.sub_folder}/master_te_data_tall_{self.today_date}_abbreviated.csv'
        self.master_te_data_tall.to_csv(tall_filename)

        # Widen data:
        self.master_te_data_wide = self.wide_master_file()
        wide_filename = f'{self.sub_folder}/master_te_data_wide_{self.today_date}_abbreviated.csv'
        self.master_te_data_wide.to_csv(wide_filename)

        # Sparsify data:
        self.master_te_data_wide_sparse_lower = self.sparsify(tall=self.master_te_data_tall, wide=self.master_te_data_wide)
        sparse_wide_filename = f'{self.sub_folder}/master_te_data_wide_sparse_lower_{self.today_date}_abbreviated.csv'
        self.master_te_data_wide_sparse_lower.to_csv(sparse_wide_filename)

        # Backfill data:
        final = self.backward_fill_all(df=self.master_te_data_wide_sparse_lower)
        final = final[final['Date'] <= self.today_date[:7]]
        final_filename = f'{self.main_folder}/final_historical_te_data_{self.today_date}_abbreviated.csv'
        final.to_csv(final_filename)
        
        if self.copy_path_final_file != None:
            path2 = self.copy_path_final_file
            if not os.path.exists(path2):
                Path(path2).mkdir(parents=True, exist_ok=True)
            final.to_csv(path2+f'final_historical_te_data_{self.today_date}_abbreviated.csv')
            print(f'Saving final table to {path2}')
        
        print(f'Saving final table to {self.dir}{self.main_folder}')
        print('All done!')

In [3]:
##################################( Argument Assignment )##################################


# countries =  [ 'Albania','Angola', 'Armenia','Azerbaijan'
#               ,'Bangladesh', 'Belarus', 'Benin', 'Bolivia', 'Bosnia'
#               ,'Cambodia', 'Cameroon', 'China', 'Colombia', 'Congo'
#               ,'Ecuador', 'El Salvador', 'Ethiopia'
#               ,'Georgia', 'Ghana', 'Guatemala'
#               ,'Honduras', 'Hungary'
#               ,'India', 'Indonesia'
#               ,'Jamaica'
#               ,'Kazakhstan', 'Kenya', 'Kosovo'
#               ,'Liberia'
#               ,'Malawi', 'Malaysia', 'Mali', 'Mauritania', 'Mexico', 'Morocco', 'Mozambique', 'Myanmar'
#               ,'Nicaragua', 'Niger', 'Nigeria'
#               ,'Paraguay', 'Philippines'
#               ,'Russia', 'Rwanda'
#               ,'Senegal', 'Serbia', 'South Africa', 'South Sudan', 'Sri Lanka'
#               ,'Tanzania', 'Thailand', 'Turkey', 'Tunisia'
#               ,'Uganda', 'Ukraine','Uzbekistan'
#               ,'Yemen'
#               ,'Zambia', 'Zimbabwe'
#             ]

#
directory_path = '/Users/zungrulin/Desktop/whatever/'        # Working directory
sub_file_storing_folder = 'Sub_Folder'                        # Folder for sub files in working directory 
main_file_storing_folder = 'Main_Folder'                      # Folder for final files in working directory 

today_date = datetime.now().strftime("%Y-%m-%d")               
last_refresh_date = '2005-01-01'                               
from_scratch = 1                                               
exisiting_data = None

copy_path_final_file = None #'/home/ml4p/Dropbox/Dropbox/ML for Peace/forecasting_te_data/'

########################################################################################################################

In [26]:
df = pd.read_csv('/Users/zungrulin/Desktop/whatever/all.csv')

In [55]:

countries = ['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 
              'Argentina', 'Armenia', 'Aruba', 'Australia', 
             'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 
             'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 
             'Botswana','Brazil', 
             'Bulgaria', 'Burkina Faso', 'Burundi', 
             'Cambodia', 'Cameroon', 'Canada', 'Cayman Islands', 'Central African Republic', 
             'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 
             'Congo',  'Costa Rica',
             'Croatia', 'Cuba', 'Cyprus', 'Denmark', 'Djibouti', 'Dominica', 
             'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Eritrea', 
             'Estonia', 'Ethiopia', 'Faroe Islands', 'Fiji', 'Finland', 
             'France', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia', 
             'Germany', 'Ghana', 'Greece', 'Greenland', 'Grenada', 'Guam', 'Guatemala',
             'Guinea',  'Guyana', 'Haiti',
             'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 
             'Ireland', 'Isle of Man', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 
             'Kiribati', "North Korea", 'South Korea', 'Kuwait', 'Kyrgyzstan', 
              'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 
             'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia', 
             'Maldives', 'Mali', 'Malta', 'Mauritania', 'Mauritius', 'Mayotte', 
             'Mexico', 'Micronesia', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 
              'Morocco', 'Mozambique', 'Myanmar', 'Namibia','Nepal', 'Netherlands', 'New Caledonia', 
             'New Zealand', 'Nicaragua', 'Niger', 'Nigeria',   
             'Northern Mariana Islands', 'Norway', 'Oman', 'Pakistan', 'Palau', 'Palestine', 'Panama', 
             'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Puerto Rico', 
             'Qatar', 'Romania', 'Russia', 'Rwanda',   
             'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 
             'Singapore',  'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 
             'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 
             'Sweden', 'Switzerland', 'Syria', 'Taiwan', 
             'Tajikistan', 'Tanzania', 'Thailand', 'Togo', 'Tonga', 
             'Tunisia', 'Turkey', 'Turkmenistan',  'Uganda', 
             'Ukraine', 'United Arab Emirates', 'United Kingdom', 
             'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 
             'Venezuela', 'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe']




In [41]:
if __name__ == "__main__":

    te_object = TradingEconomics(directory_path = directory_path,
                                 sub_file_storing_folder = sub_file_storing_folder,
                                 main_file_storing_folder = main_file_storing_folder,
                                 today_date = today_date,
                                 last_refresh_date = last_refresh_date,
                                 from_scratch = from_scratch,
                                 exisiting_data = exisiting_data,
                                 countries = countries,
                                 copy_path_final_file = copy_path_final_file
                                )

    te_object.main()

Countries for update:
- Afghanistan
- Albania
- Algeria
- Andorra
- Angola
- Argentina
- Armenia
- Aruba
- Australia
- Austria
- Azerbaijan
- Bahamas
- Bahrain
- Bangladesh
- Barbados
- Belarus
- Belgium
- Belize
- Benin
- Bermuda
- Bhutan
- Bolivia
- Botswana
- Brazil
- Bulgaria
- Burkina Faso
- Burundi
- Cambodia
- Cameroon
- Canada
- Cayman Islands
- Central African Republic
- Chad
- Chile
- China
- Colombia
- Comoros
- Congo
- Costa Rica
- Croatia
- Cuba
- Curaçao
- Cyprus
- Denmark
- Djibouti
- Dominica
- Dominican Republic
- Ecuador
- Egypt
- El Salvador
- Eritrea
- Estonia
- Ethiopia
- Faroe Islands
- Fiji
- Finland
- France
- French Polynesia
- Gabon
- Gambia
- Georgia
- Germany
- Ghana
- Greece
- Greenland
- Grenada
- Guam
- Guatemala
- Guinea
- Guinea-Bissau
- Guyana
- Haiti
- Honduras
- Hong Kong
- Hungary
- Iceland
- India
- Indonesia
- Iran
- Iraq
- Ireland
- Isle of Man
- Israel
- Italy
- Jamaica
- Japan
- Jordan
- Kazakhstan
- Kenya
- Kiribati
- North Korea
- South Korea

  self.all_country_indicators = self.all_country_indicators.append(temp)


HTTP Error 409: Conflict
Belgium  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_in

Montserrat  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_in

Lao  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_in

  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_in

Réunion  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)


Tokelau  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_in

Guinea-Bissau  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_in

Curaçao  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)


Pitcairn  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Saint Vincent and the Grenadines  encountered HTTP error


100%|█████████████████████████████████████████| 196/196 [10:19<00:00,  3.16s/it]


9 countries left: Rerun ['Montserrat', 'Belgium', 'Lao', 'Tokelau', 'Curaçao', 'Guinea-Bissau', 'Réunion', 'Pitcairn', 'Saint Vincent and the Grenadines']


  self.all_country_indicators = self.all_country_indicators.append(temp)


Montserrat  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)
  self.all_country_indicators = self.all_country_indicators.append(temp)


Lao  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Tokelau  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Curaçao  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Guinea-Bissau  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Réunion  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Pitcairn  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Saint Vincent and the Grenadines  encountered HTTP error


100%|█████████████████████████████████████████████| 9/9 [00:20<00:00,  2.30s/it]


8 countries left: Rerun ['Montserrat', 'Lao', 'Tokelau', 'Curaçao', 'Guinea-Bissau', 'Réunion', 'Pitcairn', 'Saint Vincent and the Grenadines']


  self.all_country_indicators = self.all_country_indicators.append(temp)


Montserrat  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Lao  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Tokelau  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Curaçao  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Guinea-Bissau  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Réunion  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Pitcairn  encountered HTTP error


  self.all_country_indicators = self.all_country_indicators.append(temp)


Saint Vincent and the Grenadines  encountered HTTP error


100%|█████████████████████████████████████████████| 8/8 [00:17<00:00,  2.17s/it]


8 countries left: Rerun ['Montserrat', 'Lao', 'Tokelau', 'Curaçao', 'Guinea-Bissau', 'Réunion', 'Pitcairn', 'Saint Vincent and the Grenadines']
['Montserrat', 'Lao', 'Tokelau', 'Curaçao', 'Guinea-Bissau', 'Réunion', 'Pitcairn', 'Saint Vincent and the Grenadines'] has no response from the API

(2/6) Building frequency table for indicators.


100%|█████████████████████████████████████████| 489/489 [25:49<00:00,  3.17s/it]



(3/6) Pulling Trading Economic data.


 11%|████▍                                   | 54/489 [14:24<2:37:33, 21.73s/it]

2 tries failed for indicator: Credit Rating
3 tries failed for indicator: Credit Rating
4 tries failed for indicator: Credit Rating
5 tries failed for indicator: Credit Rating
2 tries failed for indicator: Credit Rating
3 tries failed for indicator: Credit Rating
4 tries failed for indicator: Credit Rating
5 tries failed for indicator: Credit Rating
2 tries failed for indicator: Credit Rating
3 tries failed for indicator: Credit Rating
4 tries failed for indicator: Credit Rating
5 tries failed for indicator: Credit Rating
2 tries failed for indicator: Credit Rating
3 tries failed for indicator: Credit Rating
4 tries failed for indicator: Credit Rating
5 tries failed for indicator: Credit Rating
2 tries failed for indicator: Credit Rating
3 tries failed for indicator: Credit Rating
4 tries failed for indicator: Credit Rating
5 tries failed for indicator: Credit Rating
2 tries failed for indicator: Credit Rating
3 tries failed for indicator: Credit Rating
4 tries failed for indicator: Cr

100%|█████████████████████████████████████████| 489/489 [46:07<00:00,  5.66s/it]



(4/6) Building tall master file.


100%|███████████████████████████████████| 14097/14097 [00:01<00:00, 8014.12it/s]



(5/6) Aggregating submonthly indicators.
Taking only the max value of a month for 3 indicators.


100%|█████████████████████████████████████████████| 3/3 [00:05<00:00,  1.83s/it]


Taking only the sum value of a month for 4 indicators.


100%|█████████████████████████████████████████████| 4/4 [00:00<00:00,  4.22it/s]


Taking only the mean value of a month for 61 indicators.


100%|███████████████████████████████████████████| 61/61 [01:06<00:00,  1.09s/it]
  tall = tall.append(self.submonthly_indicators_nonsparse, sort=['Country', 'Date', 'Category'])
  tall = tall.append(self.submonthly_indicators_nonsparse, sort=['Country', 'Date', 'Category'])


(6/6) Finalizing dataframe with a few more steps
converting master file to wide format.
creating sparse wide file.
backfilling data.


100%|█████████████████████████████████████████| 188/188 [04:38<00:00,  1.48s/it]


Saving final table to /Users/zungrulin/Desktop/whatever/Main_Folder
All done!


In [4]:
# log in 
dir_1 = '/Users/zungrulin/Desktop/whatever/' 
os.chdir(dir_1)
if not os.path.isfile(dir_1 + 'apikey.txt'):
    raise FileNotFoundError(f'File "apikey.txt" is not present in the assigned directory {dir_1} ')
    
if os.path.exists('apikey.txt'):
    with open('apikey.txt', 'r') as apikeyfile:
        apikey = apikeyfile.read()
else:
    raise FileNotFoundError(f'apikey.txt not found in the assigned directory  {dir_1} ')

te.login(apikey)

'Logged with E2CD74D2D48E4EA'

In [52]:
# currency = []
# for key in all_index.keys():
#     if (' Billion' in key and len(key)==11) or (' Million' in key and len(key)==11) or (' Thousand' in key and len(key)==12) or '/' in key:
#         cu = key[:3]
#         if cu not in currency:
#             currency.append(cu)
# print(currency)
    
# for i in currency:
#     if i in currecy_df['Ticker']:
#         pass
#     else:
#         print(i)
    
# currency  

['GBP', 'KRW', 'RUB', 'PLN', 'NZD', 'EUR', 'MDL', 'MYR', 'BGN', 'BRL', 'AUD', 'THB', 'SEK', 'ZAR', 'MNT', 'USD', 'KGS', 'JPY', 'IDR', 'HKD', 'CAD', 'TWD', 'RON', 'KZT', 'CUP', 'CNY', 'AZN', 'AMD', 'VND', 'TJS', 'DZD', 'UZS', 'NOK', 'NIO', 'ISK', 'DKK', 'BWP', 'SGD', 'XOF', 'CRC', 'ALL', 'RSD', 'MXN', 'ILS', 'HUF', 'CLP', 'UYU', 'CHF', 'PEN', 'PAB', 'GEL', 'BYN', 'BHD', 'TRY', 'MUR', 'ARS', 'KWD', 'SAR', 'JOD', 'KMF', 'QAR', 'INR', 'EGP', 'UAH', 'PYG', 'MZN', 'BOB', 'AOA', 'VEF', 'TND', 'NGN', 'LKR', 'PGK', 'OMR', 'COP', 'BDT', 'PHP', 'MMK', 'KES', 'PKR', 'BIF', 'UGX', 'AED', 'SSP', 'MRU', 'KYD', 'BMD', 'ZMW', 'XPF', 'XAF', 'AWG', 'MWK', 'LYD', 'BTN', 'MAD', 'BBD', 'MGA', 'DJF', 'KHR', 'BSD', 'NPR', 'LSL', 'FJD', 'NAD', 'GNF', 'IRR', 'AFN', 'RWF', 'LBP', 'GHS', 'TZS', 'HNL', 'DOP', 'GTQ', 'SYP', 'SCR', 'SRD', 'GYD', 'ETB', 'KPW', 'IQD', 'GMD', 'CDF', 'BZD', 'MVR', 'VES', 'JMD', 'SLL', 'LRD', 'SDG', 'BBL']
GBP
KRW
RUB
PLN
NZD
EUR
MDL
MYR
BGN
BRL
AUD
THB
SEK
ZAR
MNT
USD
KGS
JPY
IDR
HKD
CA

['GBP',
 'KRW',
 'RUB',
 'PLN',
 'NZD',
 'EUR',
 'MDL',
 'MYR',
 'BGN',
 'BRL',
 'AUD',
 'THB',
 'SEK',
 'ZAR',
 'MNT',
 'USD',
 'KGS',
 'JPY',
 'IDR',
 'HKD',
 'CAD',
 'TWD',
 'RON',
 'KZT',
 'CUP',
 'CNY',
 'AZN',
 'AMD',
 'VND',
 'TJS',
 'DZD',
 'UZS',
 'NOK',
 'NIO',
 'ISK',
 'DKK',
 'BWP',
 'SGD',
 'XOF',
 'CRC',
 'ALL',
 'RSD',
 'MXN',
 'ILS',
 'HUF',
 'CLP',
 'UYU',
 'CHF',
 'PEN',
 'PAB',
 'GEL',
 'BYN',
 'BHD',
 'TRY',
 'MUR',
 'ARS',
 'KWD',
 'SAR',
 'JOD',
 'KMF',
 'QAR',
 'INR',
 'EGP',
 'UAH',
 'PYG',
 'MZN',
 'BOB',
 'AOA',
 'VEF',
 'TND',
 'NGN',
 'LKR',
 'PGK',
 'OMR',
 'COP',
 'BDT',
 'PHP',
 'MMK',
 'KES',
 'PKR',
 'BIF',
 'UGX',
 'AED',
 'SSP',
 'MRU',
 'KYD',
 'BMD',
 'ZMW',
 'XPF',
 'XAF',
 'AWG',
 'MWK',
 'LYD',
 'BTN',
 'MAD',
 'BBD',
 'MGA',
 'DJF',
 'KHR',
 'BSD',
 'NPR',
 'LSL',
 'FJD',
 'NAD',
 'GNF',
 'IRR',
 'AFN',
 'RWF',
 'LBP',
 'GHS',
 'TZS',
 'HNL',
 'DOP',
 'GTQ',
 'SYP',
 'SCR',
 'SRD',
 'GYD',
 'ETB',
 'KPW',
 'IQD',
 'GMD',
 'CDF',
 'BZD',
 'MVR',


In [30]:
# 
index_df = pd.read_csv('/Users/zungrulin/Desktop/whatever/Sub_Folder/(1)index_table.csv')
currecy_df = pd.read_csv('/Users/zungrulin/Desktop/whatever/Sub_Folder/markets_currency.csv')
for key in all_index.keys():
    print(key)

GBP/Week
KRW/Month
RUB/Month
points
PLN/Month
NZD/Hour
EUR/Month
MDL/Month
MYR/Month
EUR/Week
BGN/Month
BRL/Month
AUD/Week
THB/Month
SEK/Hour
ZAR/Month
MNT Thousand/Month
USD/Hour
KGS/Month
JPY/Month
IDR/Month
HKD/Month
CAD/Hour
TWD/Month
RON/Month
KZT/Month
GEL
CUP/Month
CNY/Year
AZN/Month
AMD/Month
VND Thousand/Month
TJS/Month
DZD/Month
UZS Thousand/Month
NOK/Month
NIO Thousands/Month
ISK/Month
USD/Month
DKK/Month
BWP/Month
SGD/Month
XOF/Month
EUR/Year
CRC/Month
ALL/Month
RSD/Month
MXN/Day
ILS/Month
HUF/Month
CLP/Hour
UYU/Month
CHF/Month
PEN/Month
PAB/Month
GEL/Month
BYN/Month
BHD/Month
TRY/Month
MUR/Month
ARS/Month
percent
Thousand
Persons
Tens of Thousands
Million
Points
celsius
USD Million
EUR Million
EUR Thousand
ISK Million
THB Billion
SEK Billion
USD Thousand
PAB Thousand
KWD Million
USD Thousands
SAR Million
MUR Million
JOD Million
KMF Million
mm
million
Percent
GBP/Hour
QAR/Month
EUR/Hour
INR/Day
EGP/Month
AUD/week
UAH/Month
KRW/Hour
PYG Thousand/Month
MZN/Month
JPY/Hour
HKD/

In [54]:
# get all type of currency
currency = []
for key in all_index.keys():
    if (' Billion' in key and len(key)==11) or (' Million' in key and len(key)==11) or (' Thousand' in key and len(key)==12) or '/' in key:
        cu = key[:3]
        if cu not in currency:
            currency.append(cu)


# get currency dict: (currency: rate)
cur_dict = {'USD':1, 
            "BGN":0.55, 
            "CUP": 0.042,
            "UYU": 0.025,
            "BYN": 0.31,
            "KWD": 3.26,
            "JOD": 1.41,
            "VEF": 0.0028,
            "MRU": 0.026,
            "BZD": 0.5,
            "VES": 0.0288,
            "BBL": 0.025}
for i, symbol in enumerate(currecy_df['Symbol']):
    if symbol == 'DXY:CUR':
        pass
    elif symbol[:3] == 'USD':
        key = symbol[3:6]
        if key not in cur_dict:
            cur = float(1/currecy_df.iloc[i, 6])
            cur_dict[key] = cur
    elif symbol[3:6] == 'USD':
        key = symbol[:3]
        if key not in cur_dict:
            cur = float(currecy_df.iloc[i, 6])
            cur_dict[key] = cur
cur_dict       

{'USD': 1,
 'BGN': 0.55,
 'CUP': 0.042,
 'UYU': 0.025,
 'BYN': 0.31,
 'KWD': 3.26,
 'JOD': 1.41,
 'VEF': 0.0028,
 'MRU': 0.026,
 'BZD': 0.5,
 'VES': 0.0288,
 'BBL': 0.025,
 'EUR': 1.06727,
 'GBP': 1.22579,
 'AUD': 0.63489,
 'NZD': 0.58572,
 'JPY': 0.006682437218502333,
 'CNY': 0.13690153494000976,
 'CHF': 1.1227376835676113,
 'CAD': 0.7309407207075506,
 'MXN': 0.05515722843752258,
 'INR': 0.01204500012045,
 'BRL': 0.19944951932665844,
 'RUB': 0.010582514511273023,
 'KRW': 0.0007446071824808822,
 'TRY': 0.035605650331862464,
 'IDR': 6.277463904582549e-05,
 'SAR': 0.266588467382901,
 'SEK': 0.09121091612244153,
 'NGN': 0.0011325028312570782,
 'PLN': 0.23944811997308604,
 'ARS': 0.002856979601165648,
 'NOK': 0.09037750684609615,
 'TWD': 0.03099717925668764,
 'IRR': 2.380952380952381e-05,
 'AED': 0.2723014922121773,
 'COP': 0.0002373830888287518,
 'THB': 0.027631942525559547,
 'ZAR': 0.05284188966824805,
 'DKK': 0.14300709601210412,
 'MYR': 0.20920502092050208,
 'SGD': 0.7317697852255681,


In [21]:
row_ele = {'Unit Group':None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}

all_index = {k:row_ele.copy() for k in index_df['Unit'].unique() if type(k)==str}


all_index['General'] = {'Unit Group':'General', 'Frequency Group': 'NA', 'Frequency Rate': 1, 'Exchange Rate': 1, 'Conversion Rate': 1}

print(all_index)

{'GBP/Week': {'Unit Group': None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}, 'KRW/Month': {'Unit Group': None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}, 'RUB/Month': {'Unit Group': None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}, 'points': {'Unit Group': None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}, 'PLN/Month': {'Unit Group': None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}, 'NZD/Hour': {'Unit Group': None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}, 'EUR/Month': {'Unit Group': None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}, 'MDL/Month': {'Unit Group': None, 'Frequency Group': None, 'Frequency Rate': 0, 'Exchange Rate': 0, 'Conversion Rate': 0}, 'MYR/Month': {'Unit 

In [83]:
for key, value in all_index.items():
    key_lower = key.lower()
    currency_type = key[:3]
    
    if '/year' in key_lower and len(key_lower)==8:
        all_index[key]['Frequency Group'] = '/Year'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD/YEAR'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]
        
        
    elif '/month' in key_lower and len(key_lower)==9:
        all_index[key]['Frequency Group'] = '/Year'
        all_index[key]['Frequency Rate'] = 12
        all_index[key]['Unit Group'] = 'USD/YEAR'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]
        
    elif ' thousand/month'  in key_lower and len(key_lower)==18:
        all_index[key]['Frequency Group'] = '/Year'
        all_index[key]['Frequency Rate'] = 12
        all_index[key]['Unit Group'] = 'USD/YEAR'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]*1000
    
    elif ' thousands/month'  in key_lower and len(key_lower)==19:
        all_index[key]['Frequency Group'] = '/Year'
        all_index[key]['Frequency Rate'] = 12
        all_index[key]['Unit Group'] = 'USD/YEAR'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]*1000
    
    elif ' million/month'  in key_lower and len(key_lower)==17:
        all_index[key]['Frequency Group'] = '/Year'
        all_index[key]['Frequency Rate'] = 12
        all_index[key]['Unit Group'] = 'USD/YEAR'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]*1000000
        
    elif '/week' in key_lower and len(key_lower)==8:
        all_index[key]['Frequency Group'] = '/Year'
        all_index[key]['Frequency Rate'] = 51
        all_index[key]['Unit Group'] = 'USD/YEAR'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]
    
    elif '/day' in key_lower and len(key_lower)==7:
        all_index[key]['Frequency Group'] = '/Year'
        all_index[key]['Frequency Rate'] = 365
        all_index[key]['Unit Group'] = 'USD/YEAR'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]
    
    elif '/hour' in key_lower and len(key_lower)==8:
        all_index[key]['Frequency Group'] = '/Year'
        all_index[key]['Frequency Rate'] = 8760
        all_index[key]['Unit Group'] = 'USD/YEAR'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]
        
    elif ' thousand' in key_lower and len(key_lower)== 12:
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]*1000
        
    elif ' thousands' in key_lower and len(key_lower)== 13:
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] = cur_dict[currency_type]*1000
    
    elif ' million' in key_lower and len(key_lower)== 11:
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] =  cur_dict[currency_type]*1000000
    
    elif ' hundred million' in key_lower and len(key_lower)== 19:
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] =  cur_dict[currency_type]*100000000
        
    elif ' billion' in key_lower and len(key_lower)== 11:
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] =  cur_dict[currency_type]*1000000000
    
    elif key_lower == 'billion usd':
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] =  1000000000
        
    elif ' trillion' in key_lower and len(key_lower)== 12:
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] =  cur_dict[currency_type]*1000000000000
        
    elif key_lower == 'trillion usd':
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD '
        all_index[key]['Exchange Rate'] =  1000000000000
        
    elif key_lower.upper() == key and len(key)==3:
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] =  cur_dict[currency_type]
        
    elif key_lower == 'inr tens of million':
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = 'USD'
        all_index[key]['Exchange Rate'] =  cur_dict['INR']*10000000
        

    
    else:
        all_index[key]['Frequency Group'] = 'NA'
        all_index[key]['Frequency Rate'] = 1
        all_index[key]['Unit Group'] = key
        all_index[key]['Exchange Rate'] = 1
    
    all_index[key]['Conversion Rate'] = all_index[key]['Frequency Rate'] * all_index[key]['Exchange Rate']
        
        
    
    

In [84]:
df = pd.DataFrame(all_index).T
df.to_csv('/Users/zungrulin/Desktop/whatever/Sub_Folder/Unit_Conversion.csv')

In [77]:
for key, value in all_index.items():
    if all_index[key]['Unit Group'] == None:
        print(key)

points
percent
Thousand
Persons
Tens of Thousands
Million
Points
celsius
mm
million
Percent
EUR/SQ. METRE
Index
PHP/SQ. METRE
percent of GDP
USD/Liter
thousands
Number of persons
Hundred
Billion BDT
thousand points
Index Points
doses
doses per 100 people
million points
Units
Thousands Square Metre
Tonnes
Gigawatt-hour
%
SIPRI TIV Million
Thousands
Years
thousand
per 1000 people
Thousand units
dwellings
index points
per one million people
percentage points
Thousands Person
BBL/D/1K
KT
Hundreds
Companies
Companies and Individuals
Individuals
Terajoule
Thousand Tonnes
persons
Hours
units
Million Square Metre
Tens of Thousands Square Metre
Net Balance
GBP/MWh
EUR/MWh
Million Units
Thousand Units
Hundred Units
Thousands of Ton
Thousands of Tonnes
Ten Thousands of Tonnes
Kg
Thousand Barrels Per Da
Thousand Barrels
billion cubic feet
Billion Bushels
BBL/1Million


In [119]:
trading_df2 = pd.read_csv('/Users/zungrulin/Desktop/whatever/Sub_Folder/(2)full_us_trading_data.csv')
# trading_df.drop(['Unnamed: 0'],axis=1)

In [110]:
print(trading_df.shape[0])
trading_df.columns
trading_df.drop_duplicates(inplace=True)
trading_df = trading_df.reset_index(drop=True)
trading_df = trading_df.set_index('symbol')
print(trading_df.shape[0])

380200

In [121]:
trading_dict = {symbol:0 for symbol in trading_df2['symbol']}
for symbol in trading_df2['symbol']:
    trading_dict[symbol]+=1

In [130]:
sorted_dict = {k:v for k,v in sorted(trading_dict.items(), key=lambda item: item[1], reverse=True)}

count1 = sum([1 if v==1 else 0 for k,v in trading_dict.items()])
count2 = sum([1 if v==2 else 0 for k,v in trading_dict.items()])
        
print(count1, count2)

# Assuming you want to drop a column named 'column_to_drop'
trading_df.drop('Unnamed: 0', axis=1, inplace=True)

# Drop duplicates
trading_df.drop_duplicates(inplace=True)
trading_df.to_csv('/Users/zungrulin/Desktop/whatever/Sub_Folder/(2)updated_full_us_trading_data.csv')

197726 91237


In [148]:
unique_combinations = trading_df[['country2', 'type', 'category']].drop_duplicates()
unique_combinations.shape[0]

288564

In [153]:
duplicated_rows = trading_df[trading_df.duplicated(subset=['country2', 'type', 'category'], keep=False)]


In [159]:
duplicated_rows.sort_values(by=['country2', 'type', 'category'])

Unnamed: 0_level_0,country1,country2,value,date,type,category,url,title,StartDate,lastupdate
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
USAAFG48092,United States,Afghanistan,5720,2020,Export,"Carbon Paper, Self-copy Paper, Other Copying o...",/united-states/exports/afghanistan/paper-carbo...,"United States exports of carbon paper, self-co...",1998,2022-02-16T09:00:00
USAAFG48162,United States,Afghanistan,6780,2018,Export,"Carbon Paper, Self-copy Paper, Other Copying o...",/united-states/exports/afghanistan/paper-carbo...,"United States exports of carbon paper, self-co...",2005,2022-02-15T13:02:00
USAAFG85362,United States,Afghanistan,129437,2022,Export,Electrical Apparatus for Switching or Protecti...,/united-states/exports/afghanistan/electrical-...,United States exports of electrical apparatus ...,1994,2023-06-16T09:21:00
USAAFG85352,United States,Afghanistan,82853,2021,Export,Electrical Apparatus for Switching or Protecti...,/united-states/exports/afghanistan/electrical-...,United States exports of electrical apparatus ...,1994,2022-02-24T09:00:00
USAAFG85361,United States,Afghanistan,82042,2022,Import,Electrical Apparatus for Switching or Protecti...,/united-states/imports/afghanistan/electrical-...,United States Imports from Afghanistan of Elec...,1997,2023-06-16T09:21:00
...,...,...,...,...,...,...,...,...,...,...
USAVNM85353,United States,Vietnam,1491374,2022,Re-export,Electrical Apparatus for Switching or Protecti...,/united-states/reexports/vietnam/electrical-ap...,United States re-exports of electrical apparat...,1999,2023-06-16T10:54:00
USAYEM85362,United States,Yemen,7109,2022,Export,Electrical Apparatus for Switching or Protecti...,/united-states/exports/yemen/electrical-appara...,United States exports of electrical apparatus ...,1991,2023-06-16T10:55:00
USAYEM85352,United States,Yemen,66692,2021,Export,Electrical Apparatus for Switching or Protecti...,/united-states/exports/yemen/electrical-appara...,United States exports of electrical apparatus ...,1991,2022-02-24T10:52:00
USAZMB85352,United States,Zambia,3000,2022,Export,Electrical Apparatus for Switching or Protecti...,/united-states/exports/zambia/electrical-appar...,United States exports of electrical apparatus ...,1991,2023-06-16T10:55:00


In [3]:
from os import listdir
from os.path import isfile, join
mypath = '/Users/zungrulin/Desktop/whatever/Sub_Folder/'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

In [20]:
subclass_files = sorted([file for file in onlyfiles if file[0]=='(' and file[-3:]=='csv' and len(file)<=20])[1:]
# print(subclass_files)
for file in subclass_files:
    filepath = mypath + file
    df = pd.read_csv(filepath)
    dates = [dateparser.parse(date) for date in df['Date']]
    df['Year'] = [date.year for date in dates]
    df['Month'] = [date.month for date in dates]
    savepath = '/Users/zungrulin/Desktop/whatever/Project_Data/' + file
    df.to_csv(savepath)
    
    
    

In [37]:

index_df = pd.read_csv(mypath+'(1)index_table.csv')
symbol_dict = {(row[1]['Country'],row[1]['Category']):row[1]['HistoricalDataSymbol'] for row in index_df.iterrows()}
symbol_dict 



{('United Kingdom', 'Wages in Manufacturing'): 'UNITEDKINWAGINMAN',
 ('South Korea', 'Wages in Manufacturing'): 'SOUTHKOREWAGINMAN',
 ('Russia', 'Wages in Manufacturing'): 'RUSSIAWAGINMAN',
 ('Portugal', 'Wages in Manufacturing'): 'PORTUGALWAGINMAN',
 ('Poland', 'Wages in Manufacturing'): 'POLANDWAGINMAN',
 ('Norway', 'Wages in Manufacturing'): 'NORWAYWAGINMAN',
 ('New Zealand', 'Wages in Manufacturing'): 'NEWZEALANWAGINMAN',
 ('Montenegro', 'Wages in Manufacturing'): 'MONTENEGROWAGINMAN',
 ('Moldova', 'Wages in Manufacturing'): 'MOLDOVAWAGINMAN',
 ('Malaysia', 'Wages in Manufacturing'): 'MALAYSIAWAGINMAN',
 ('Ireland', 'Wages in Manufacturing'): 'IRELANDWAGINMAN',
 ('Estonia', 'Wages in Manufacturing'): 'ESTONIAWAGINMAN',
 ('Denmark', 'Wages in Manufacturing'): 'DENMARKWAGINMAN',
 ('Bulgaria', 'Wages in Manufacturing'): 'BULGARIAWAGINMAN',
 ('Brazil', 'Wages in Manufacturing'): 'BRAZILWAGINMAN',
 ('Australia', 'Wages in Manufacturing'): 'AUSTRALIAWAGINMAN',
 ('Thailand', 'Wages in Man

In [35]:
row[1]['Country']

'Turkey'

In [41]:
mypath2 = '/Users/zungrulin/Desktop/whatever/Project_Data/'
onlyfiles2 = [f for f in listdir(mypath2) if isfile(join(mypath2, f))]
subclass_files2 = sorted([file for file in onlyfiles2 if file[0]=='(' and file[-3:]=='csv' and len(file)<=20])[1:]
# print(subclass_files)
for file in subclass_files2:
    filepath = mypath2 + file
    df = pd.read_csv(filepath)
    df['HistoricalDataSymbol'] = [symbol_dict[(row[1]['Country'],row[1]['Category'])] for row in df.iterrows()]
    df.index = range(1, len(df) + 1)
    df.index.name = 'CCID'
    df = df.drop(columns=['Unnamed: 0'])
    df = df[['HistoricalDataSymbol','Country','Category','Date','Year', 'Month','Value']]                              
    savepath = mypath2 + file
    print('Saving ', savepath)
    df.to_csv(savepath)

Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(11)Consumer.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(12)Taxes.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(13)Markets.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(14)Climate.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(15)Trade.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(3)Labour.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(4)Health.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(5)GDP.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(6)Housing.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(7)Money.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(8)Government.csv
Saving  /Users/zungrulin/Desktop/whatever/Project_Data/(9)Business.csv


In [55]:
country_df = pd.read_csv(mypath+'(0)all_country_list.csv')
country_df.index = range(1, len(country_df) + 1)
country_df.index.name = 'CID'
country_df = country_df.drop(columns=['Unnamed: 0'])
country_df.to_csv(mypath2 + '(0)all_country_list.csv')

In [76]:
index_df.index = range(1, len(index_df) + 1)
index_df.index.name = 'ID'
index_df['Frequency'] = ['Quarterly' if f=='Quaterly' else f for f in index_df['Frequency'] ]
# index_df = index_df.drop(columns=['Unnamed: 0'])
index_df.to_csv(mypath2 + '(1)index_table.csv')

In [71]:
trading_df = pd.read_csv(mypath+'(2)updated_full_us_trading_data.csv')
print(len(trading_df))
# Sort by date and value in descending order
trading_df = trading_df.sort_values(by=['date', 'value'], ascending=[False, False])

# Drop duplicates based on 'country2', 'type', 'category' and keep the first occurrence
trading_df = trading_df.drop_duplicates(subset=['country2', 'type', 'category'], keep='first')
trading_df.columns = trading_df.columns.str.capitalize()
trading_df.index = trading_df['Symbol']
trading_df = trading_df.drop('Symbol',axis=1)
trading_df = trading_df.rename(columns={'Date': 'Year'})
trading_df = trading_df.sort_values(by=['Country2','Category'])
# trading_df = trading_df.drop(columns=['Unnamed: 0'])
print(len(trading_df))
trading_df.to_csv(mypath2 + '(2)US_trading_data.csv')

288963
288564


In [87]:
c_df = pd.read_csv(mypath2+'(0-1)country_stats.csv')
c_df = c_df[['Country','Region','Access to improved drinking water (2020)','Access to unimproved drinking water (2020)','Access to improved sanitation (2020)','Access to unimproved sanitation facilities (2020)','Urban_population (2020)','Rural_population (2020)','Population (2020)','Population of children under the age of 1 (2020)','Population of children under the age of 15 (2020)','Population aged 15 to 64 years (2020)']]



In [88]:
c_df.columns = ['Country', 'Region', 'Improved_Drinking_Water_Access', 'Unimproved_Drinking_Water_Access', 'Improved_Sanitation_Access', 'Unimproved_Sanitation_Access',
                'Urban_Population', 'Rural_Population', 'Population', 'Population_under_1yo', 'Population_under_15yo',
                'Population_aged_15~64']
c_df.index =  c_df['Country']
c_df = c_df.drop(['Country'],axis=1)
c_df.to_csv(mypath2 +'(0-1)country_stats.csv')
    

In [89]:
for i in c_df.columns:
    print(i)

Region
Improved_Drinking_Water_Access
Unimproved_Drinking_Water_Access
Improved_Sanitation_Access
Unimproved_Sanitation_Access
Urban_Population
Rural_Population
Population
Population_under_1yo
Population_under_15yo
Population_aged_15~64


In [19]:
import dateparser
dateparser.parse(df['Date'][0])

datetime.datetime(2005, 1, 26, 0, 0)

In [57]:
df_all = pd.DataFrame(all_c)
row = []
for c in df_all['Country']:
    if c in countries:
        row.append(1)
    else:
        row.append(0)
df_all['Included'] = row
# df_all.to_csv('/Users/zungrulin/Desktop/whatever/all_countries.csv')

In [66]:
df_tall = pd.read_csv('/Users/zungrulin/Desktop/whatever/Sub_Folder/(2)tall_data(needs categorization).csv')

In [67]:
docs_tall = df_tall.to_dict('records')

In [69]:
df = pd.read_csv('/Users/zungrulin/Desktop/whatever/Sub_Folder/(1)index_table.csv')
docs = df.to_dict('records')
dic = {}
for doc in docs:
    cat = doc['Category']
    cat_group = doc['CategoryGroup']
    if cat_group not in dic:
        dic[cat_group] = {cat}
    elif cat not in dic[cat_group]:
        dic[cat_group].add(cat)
    else:
        pass
        

In [72]:
df_tall.head()

Unnamed: 0.1,Unnamed: 0,Category,Country,Date,Value
0,0,government budget value,Norway,2005-03,-22116.0
1,1,government budget value,Norway,2005-06,118329.0
2,2,government budget value,Norway,2005-09,-28561.0
3,3,government budget value,Norway,2005-12,143131.0
4,4,government budget value,Norway,2006-03,-1346.0


In [81]:
def find_key_and_value(target_value, my_dict):
    for key, values in my_dict.items():
        for value in values:
            if target_value.lower() == value.lower():
                return key, value
    return None, None

find_key_and_value('government budget value', dic)

('Government', 'Government Budget Value')

In [82]:
final_dic = {key: [] for key in dic.keys()}

def find_key_and_value(target_value, my_dict):
    for key, values in my_dict.items():
        for value in values:
            if target_value.lower() == value.lower():
                return key, value
    return None, None


for index, doc_tall in enumerate(docs_tall):
    
    table, ind = find_key_and_value(doc_tall['Category'], dic)
    new_doc = doc_tall.copy()
    new_doc['Category'] = ind
    final_dic[table].append(new_doc)
    if (index+1)%1000 == 0:
        print(index+1,'/',len(docs_tall))
    
    

1000 / 2143768
2000 / 2143768
3000 / 2143768
4000 / 2143768
5000 / 2143768
6000 / 2143768
7000 / 2143768
8000 / 2143768
9000 / 2143768
10000 / 2143768
11000 / 2143768
12000 / 2143768
13000 / 2143768
14000 / 2143768
15000 / 2143768
16000 / 2143768
17000 / 2143768
18000 / 2143768
19000 / 2143768
20000 / 2143768
21000 / 2143768
22000 / 2143768
23000 / 2143768
24000 / 2143768
25000 / 2143768
26000 / 2143768
27000 / 2143768
28000 / 2143768
29000 / 2143768
30000 / 2143768
31000 / 2143768
32000 / 2143768
33000 / 2143768
34000 / 2143768
35000 / 2143768
36000 / 2143768
37000 / 2143768
38000 / 2143768
39000 / 2143768
40000 / 2143768
41000 / 2143768
42000 / 2143768
43000 / 2143768
44000 / 2143768
45000 / 2143768
46000 / 2143768
47000 / 2143768
48000 / 2143768
49000 / 2143768
50000 / 2143768
51000 / 2143768
52000 / 2143768
53000 / 2143768
54000 / 2143768
55000 / 2143768
56000 / 2143768
57000 / 2143768
58000 / 2143768
59000 / 2143768
60000 / 2143768
61000 / 2143768
62000 / 2143768
63000 / 2143768
6

493000 / 2143768
494000 / 2143768
495000 / 2143768
496000 / 2143768
497000 / 2143768
498000 / 2143768
499000 / 2143768
500000 / 2143768
501000 / 2143768
502000 / 2143768
503000 / 2143768
504000 / 2143768
505000 / 2143768
506000 / 2143768
507000 / 2143768
508000 / 2143768
509000 / 2143768
510000 / 2143768
511000 / 2143768
512000 / 2143768
513000 / 2143768
514000 / 2143768
515000 / 2143768
516000 / 2143768
517000 / 2143768
518000 / 2143768
519000 / 2143768
520000 / 2143768
521000 / 2143768
522000 / 2143768
523000 / 2143768
524000 / 2143768
525000 / 2143768
526000 / 2143768
527000 / 2143768
528000 / 2143768
529000 / 2143768
530000 / 2143768
531000 / 2143768
532000 / 2143768
533000 / 2143768
534000 / 2143768
535000 / 2143768
536000 / 2143768
537000 / 2143768
538000 / 2143768
539000 / 2143768
540000 / 2143768
541000 / 2143768
542000 / 2143768
543000 / 2143768
544000 / 2143768
545000 / 2143768
546000 / 2143768
547000 / 2143768
548000 / 2143768
549000 / 2143768
550000 / 2143768
551000 / 21437

975000 / 2143768
976000 / 2143768
977000 / 2143768
978000 / 2143768
979000 / 2143768
980000 / 2143768
981000 / 2143768
982000 / 2143768
983000 / 2143768
984000 / 2143768
985000 / 2143768
986000 / 2143768
987000 / 2143768
988000 / 2143768
989000 / 2143768
990000 / 2143768
991000 / 2143768
992000 / 2143768
993000 / 2143768
994000 / 2143768
995000 / 2143768
996000 / 2143768
997000 / 2143768
998000 / 2143768
999000 / 2143768
1000000 / 2143768
1001000 / 2143768
1002000 / 2143768
1003000 / 2143768
1004000 / 2143768
1005000 / 2143768
1006000 / 2143768
1007000 / 2143768
1008000 / 2143768
1009000 / 2143768
1010000 / 2143768
1011000 / 2143768
1012000 / 2143768
1013000 / 2143768
1014000 / 2143768
1015000 / 2143768
1016000 / 2143768
1017000 / 2143768
1018000 / 2143768
1019000 / 2143768
1020000 / 2143768
1021000 / 2143768
1022000 / 2143768
1023000 / 2143768
1024000 / 2143768
1025000 / 2143768
1026000 / 2143768
1027000 / 2143768
1028000 / 2143768
1029000 / 2143768
1030000 / 2143768
1031000 / 2143768

1443000 / 2143768
1444000 / 2143768
1445000 / 2143768
1446000 / 2143768
1447000 / 2143768
1448000 / 2143768
1449000 / 2143768
1450000 / 2143768
1451000 / 2143768
1452000 / 2143768
1453000 / 2143768
1454000 / 2143768
1455000 / 2143768
1456000 / 2143768
1457000 / 2143768
1458000 / 2143768
1459000 / 2143768
1460000 / 2143768
1461000 / 2143768
1462000 / 2143768
1463000 / 2143768
1464000 / 2143768
1465000 / 2143768
1466000 / 2143768
1467000 / 2143768
1468000 / 2143768
1469000 / 2143768
1470000 / 2143768
1471000 / 2143768
1472000 / 2143768
1473000 / 2143768
1474000 / 2143768
1475000 / 2143768
1476000 / 2143768
1477000 / 2143768
1478000 / 2143768
1479000 / 2143768
1480000 / 2143768
1481000 / 2143768
1482000 / 2143768
1483000 / 2143768
1484000 / 2143768
1485000 / 2143768
1486000 / 2143768
1487000 / 2143768
1488000 / 2143768
1489000 / 2143768
1490000 / 2143768
1491000 / 2143768
1492000 / 2143768
1493000 / 2143768
1494000 / 2143768
1495000 / 2143768
1496000 / 2143768
1497000 / 2143768
1498000 / 

1913000 / 2143768
1914000 / 2143768
1915000 / 2143768
1916000 / 2143768
1917000 / 2143768
1918000 / 2143768
1919000 / 2143768
1920000 / 2143768
1921000 / 2143768
1922000 / 2143768
1923000 / 2143768
1924000 / 2143768
1925000 / 2143768
1926000 / 2143768
1927000 / 2143768
1928000 / 2143768
1929000 / 2143768
1930000 / 2143768
1931000 / 2143768
1932000 / 2143768
1933000 / 2143768
1934000 / 2143768
1935000 / 2143768
1936000 / 2143768
1937000 / 2143768
1938000 / 2143768
1939000 / 2143768
1940000 / 2143768
1941000 / 2143768
1942000 / 2143768
1943000 / 2143768
1944000 / 2143768
1945000 / 2143768
1946000 / 2143768
1947000 / 2143768
1948000 / 2143768
1949000 / 2143768
1950000 / 2143768
1951000 / 2143768
1952000 / 2143768
1953000 / 2143768
1954000 / 2143768
1955000 / 2143768
1956000 / 2143768
1957000 / 2143768
1958000 / 2143768
1959000 / 2143768
1960000 / 2143768
1961000 / 2143768
1962000 / 2143768
1963000 / 2143768
1964000 / 2143768
1965000 / 2143768
1966000 / 2143768
1967000 / 2143768
1968000 / 

In [84]:
for key, value in final_dic.items():
    df = pd.DataFrame(value)
    df.to_csv(f'/Users/zungrulin/Desktop/whatever/Sub_Folder/{key}.csv', index=False)

In [60]:
df_all.to_csv('/Users/zungrulin/Desktop/whatever/Sub_Folder/all_country_list.csv')

In [7]:
mydata = te.getHistoricalRatings(country=['Afghanistan','Benin'], initDate='2005-01-01', output_type= 'df')

In [8]:
mydata

Unnamed: 0,Country,Date,Agency,Rating,Outlook
0,Benin,1/25/2012,Fitch,,
1,Benin,2/20/2012,S&P,B,Stable
2,Benin,11/1/2013,S&P,,
3,Benin,7/5/2018,S&P,B+,Stable
4,Benin,3/8/2019,Fitch,B,Positive
5,Benin,6/18/2019,Moody's,B2,Positive
6,Benin,4/9/2020,Fitch,B,Stable
7,Benin,2/11/2021,Fitch,B,Positive
8,Benin,3/9/2021,Moody's,B1,Stable
9,Benin,10/29/2021,Fitch,B+,Stable


In [54]:
to_be_removed = ['Montserrat', 'Lao', 'Tokelau', 'Curaçao', 'Guinea-Bissau', 'Réunion', 'Pitcairn', 'Saint Vincent and the Grenadines'] 

c_list = countries

ValueError: list.remove(x): x not in list

In [51]:
full_data.to_csv('/Users/zungrulin/Desktop/whatever/full_US_trading_data(2022).csv')

In [15]:
mydata2 = te.getHistoricalData(country=['Afghanistan','Benin'], indicator=['Unemployment Rate', 'Terrorism Index'], initDate='2005-01-01', output_type= 'df')



In [16]:
mydata2

Unnamed: 0,Country,Category,DateTime,Value,Frequency,HistoricalDataSymbol,LastUpdate
0,Afghanistan,Terrorism Index,2005-12-31T00:00:00,6.770,Yearly,AfghanistTerInd,2015-11-24T19:02:00
1,Afghanistan,Unemployment Rate,2005-12-31T00:00:00,8.500,Yearly,AFGSTANUNETRATE,2014-03-12T10:25:00
2,Benin,Terrorism Index,2005-12-31T00:00:00,0.000,Yearly,BeninTerInd,2015-11-24T19:02:00
3,Benin,Unemployment Rate,2005-12-31T00:00:00,0.900,Yearly,BENENINUNETRATE,2018-07-05T13:12:00
4,Afghanistan,Terrorism Index,2006-12-31T00:00:00,7.310,Yearly,AfghanistTerInd,2015-11-24T19:02:00
...,...,...,...,...,...,...,...
63,Benin,Unemployment Rate,2020-12-31T00:00:00,1.600,Yearly,BENENINUNETRATE,2022-05-09T21:20:00
64,Afghanistan,Terrorism Index,2021-12-31T00:00:00,9.109,Yearly,AfghanistTerInd,2022-09-19T14:11:00
65,Afghanistan,Unemployment Rate,2021-12-31T00:00:00,13.300,Yearly,AFGSTANUNETRATE,2022-07-20T15:04:00
66,Benin,Terrorism Index,2021-12-31T00:00:00,3.164,Yearly,BeninTerInd,2022-09-19T14:11:00


In [23]:
# mydata = te.getHistoricalRatings(country='afghanistan', initDate='2005-01-01', output_type= 'df')
# mydata2 = te.getHistoricalData(country=['Afghanistan'], initDate='2005-01-01', output_type= 'df')
# mydata3 = te.getHistoricalData(country=['Afghanistan'],indicator=['Trade'],output_type='df')
# mydata = te.getHistoricalData(country=countries_chunk, indicator=[indicator], initDate=initDate, output_type= 'df')
# mydata5=te.getHistorical('AFGCHNXX611')  
mydata5=te.getCmtHistorical('USAHKG31012',output_type='df') 


In [25]:
mydata5

Unnamed: 0,symbol,date,value
0,USAHKG31012,2022-12-31T00:00:00,141491.0
1,USAHKG31012,2021-12-31T00:00:00,101071.0
2,USAHKG31012,2020-12-31T00:00:00,121965.0
3,USAHKG31012,2019-12-31T00:00:00,111110.0
4,USAHKG31012,2018-12-31T00:00:00,128180.0
5,USAHKG31012,2017-12-31T00:00:00,184345.0
6,USAHKG31012,2016-12-31T00:00:00,496653.0
7,USAHKG31012,2015-12-31T00:00:00,258093.0
8,USAHKG31012,2014-12-31T00:00:00,78207.0
9,USAHKG31012,2013-12-31T00:00:00,91774.0


In [16]:
# df = pd.DataFrame(mydata5)
df.to_csv('/Users/zungrulin/Desktop/AFG_import_export_data.csv')

In [11]:
df = pd.DataFrame(mydata5)
df

Unnamed: 0,symbol,date,value
0,AFGCHNXX611,2019-12-31T00:00:00,8071352.0
1,AFGCHNXX611,2018-12-31T00:00:00,13699732.0
2,AFGCHNXX611,2017-12-31T00:00:00,9116821.0
3,AFGCHNXX611,2016-12-31T00:00:00,19840059.0
4,AFGCHNXX611,2015-12-31T00:00:00,14970717.0
5,AFGCHNXX611,2011-12-31T00:00:00,2244236.0
6,AFGCHNXX611,2010-12-31T00:00:00,6452551.0
7,AFGCHNXX611,2009-12-31T00:00:00,4483561.0
8,AFGCHNXX611,2008-12-31T00:00:00,5370903.0


In [21]:
df

Unnamed: 0,symbol,country1,country2,value,date,type,category,url,title,StartDate,lastupdate
0,USAGRC84793,United States,Greece,10645,2022,Re-export,Machines and Mechanical Appliances Having Indi...,/united-states/reexports/greece/machines-havin...,United States re-exports of machines and mecha...,1991,2023-06-16T09:56:00
1,USAGRC84792,United States,Greece,3936906,2022,Export,Machines and Mechanical Appliances Having Indi...,/united-states/exports/greece/machines-having-...,United States exports of machines and mechanic...,1991,2023-06-16T09:56:00
2,USAGRC84791,United States,Greece,1710677,2022,Import,Machines and Mechanical Appliances Having Indi...,/united-states/imports/greece/machines-having-...,United States Imports from Greece of Machines ...,1991,2023-06-16T09:56:00
3,USAGRC84722,United States,Greece,69889,2022,Export,Office machines not specified elsewhere,/united-states/exports/greece/office-machines-...,United States exports of office machines not s...,1991,2023-06-16T09:56:00
4,USAGRC84651,United States,Greece,3051,2022,Import,"Machine tools for working wood, cork, bone",/united-states/imports/greece/machine-tools-wo...,United States Imports from Greece of Machine t...,1992,2023-06-16T09:56:00
...,...,...,...,...,...,...,...,...,...,...,...
9995,USAVCT84073,United States,St Vincent and the Grenadines,2932,2022,Re-export,Spark-ignition Internal Combustion Piston Engines,/united-states/reexports/st-vincent-grenadines...,United States re-exports of spark-ignition int...,1991,2023-06-16T10:33:00
9996,USAVCT84072,United States,St Vincent and the Grenadines,119854,2022,Export,Spark-ignition Internal Combustion Piston Engines,/united-states/exports/st-vincent-grenadines/s...,United States exports of spark-ignition intern...,1991,2023-06-16T10:33:00
9997,USAVCT84062,United States,St Vincent and the Grenadines,3895,2022,Export,"Steam turbines and other vapor turbines, parts",/united-states/exports/st-vincent-grenadines/s...,United States exports of steam turbines and ot...,1993,2023-06-16T10:33:00
9998,USAVCT84022,United States,St Vincent and the Grenadines,13615,2022,Export,Steam or Other Vapour Generating Boilers,/united-states/exports/st-vincent-grenadines/s...,United States exports of steam or other vapour...,1994,2023-06-16T10:33:00


In [79]:
mydata4 = te.getHistoricalRatings(country= ['Mexico'], output_type='df')

In [20]:
# mydata4 = te.getCmtHistorical(symbol = 'AFGCHNXX611')
#not working: getCmtSnapshotByType, getCmtTotalByTypeAndMainCategory
# te.getCmtSnapshotByType(country ='Portugal',type='export',output_type='df')
df = te.getCmtCountry(country = 'United States',output_type = 'df')

In [143]:
list_label_import = []
for index, row in enumerate(df.loc):
    if df.loc[index,'type']=='Import':
#         print(row)
        list_label_import.append(df.iloc[index])
        

symbol                           AFGABW00001
country1                         Afghanistan
country2                               Aruba
type                                  Import
category                                None
url               /afghanistan/imports/aruba
title         Afghanistan imports from Aruba
lastupdate               2021-03-30T00:35:00
Name: 0, dtype: object
symbol                                              AFGABW17041
country1                                            Afghanistan
country2                                                  Aruba
type                                                     Import
category      Sugar Confectionery (Including White Chocolate...
url           /afghanistan/imports/aruba/sugar-confection-in...
title         Afghanistan imports of sugar confectionery (in...
lastupdate                                  2021-03-26T08:58:00
Name: 1, dtype: object
symbol                                              AFGABW04051
country1          

Name: 1041, dtype: object
symbol                                              AFGCHN33021
country1                                            Afghanistan
country2                                                  China
type                                                     Import
category                     Mixtures of Odoriferous Substances
url           /afghanistan/imports/china/odoriferous-mixture...
title         Afghanistan imports of mixtures of odoriferous...
lastupdate                                  2021-03-26T09:00:00
Name: 1042, dtype: object
symbol                                              AFGCHN53091
country1                                            Afghanistan
country2                                                  China
type                                                     Import
category                                  Woven fabrics of flax
url               /afghanistan/imports/china/woven-fabrics-flax
title         Afghanistan imports of woven fabrics o

Name: 1634, dtype: object
symbol                                              AFGDNK94041
country1                                            Afghanistan
country2                                                Denmark
type                                                     Import
category      Mattress Supports; Articles of Bedding and Sim...
url           /afghanistan/imports/denmark/matress-supports-...
title         Afghanistan imports of mattress supports; arti...
lastupdate                                  2021-03-26T09:00:00
Name: 1635, dtype: object
symbol                                              AFGDNK99991
country1                                            Afghanistan
country2                                                Denmark
type                                                     Import
category             Estimate Of Low Valued Import Transactions
url           /afghanistan/imports/denmark/commodities-not-s...
title         Afghanistan Imports from Denmark of Co

Name: 2308, dtype: object
symbol                                              AFGIND39241
country1                                            Afghanistan
country2                                                  India
type                                                     Import
category      Table Ware, Kitchenware, Other Household Artic...
url           /afghanistan/imports/india/tableware-household...
title         Afghanistan imports of table ware, kitchenware...
lastupdate                                  2021-03-26T09:22:00
Name: 2309, dtype: object
symbol                                              AFGIDNXX041
country1                                            Afghanistan
country2                                              Indonesia
type                                                     Import
category           Dairy products, eggs, honey, edible products
url           /afghanistan/imports/indonesia/dairy-products-...
title         Afghanistan Imports from Indonesia of 

Name: 2963, dtype: object
symbol                                              AFGIRN28111
country1                                            Afghanistan
country2                                                   Iran
type                                                     Import
category      Other Inorganic Acids, Other Inorganic Oxygen ...
url           /afghanistan/imports/iran/inorganic-acids-inor...
title         Afghanistan imports of other inorganic acids, ...
lastupdate                                  2021-03-26T09:23:00
Name: 2964, dtype: object
symbol                                              AFGIRN79071
country1                                            Afghanistan
country2                                                   Iran
type                                                     Import
category               Articles of zinc not specified elsewhere
url                     /afghanistan/imports/iran/articles-zinc
title         Afghanistan imports of articles of zin

Name: 3626, dtype: object
symbol                                              AFGKAZ23081
country1                                            Afghanistan
country2                                             Kazakhstan
type                                                     Import
category      Vegetable Materials, Waste, Residues of a Kind...
url           /afghanistan/imports/kazakhstan/vegetable-mate...
title         Afghanistan imports of vegetable materials, wa...
lastupdate                                  2021-03-26T09:23:00
Name: 3628, dtype: object
symbol                                              AFGKAZ27071
country1                                            Afghanistan
country2                                             Kazakhstan
type                                                     Import
category      Oils and Other Products of Distillation of Hig...
url           /afghanistan/imports/kazakhstan/oils-high-temp...
title         Afghanistan imports of oils and other 

Name: 4303, dtype: object
symbol                                              AFGPAK44201
country1                                            Afghanistan
country2                                               Pakistan
type                                                     Import
category      Wood Marquetry and Inlaid Wood, Caskets and Ca...
url           /afghanistan/imports/pakistan/wood-marquetry-j...
title         Afghanistan imports of wood marquetry and inla...
lastupdate                                  2021-03-26T09:25:00
Name: 4305, dtype: object
symbol                                              AFGPAK48021
country1                                            Afghanistan
country2                                               Pakistan
type                                                     Import
category      Paper, uncoat, for writing, rolls, handmade paper
url           /afghanistan/imports/pakistan/paper-uncoated-w...
title         Afghanistan imports of paper, uncoat, 

Name: 4980, dtype: object
symbol                                              AFGRUS33021
country1                                            Afghanistan
country2                                                 Russia
type                                                     Import
category                     Mixtures of Odoriferous Substances
url           /afghanistan/imports/russia/odoriferous-mixtur...
title         Afghanistan imports of mixtures of odoriferous...
lastupdate                                  2021-03-26T09:46:00
Name: 4982, dtype: object
symbol                                              AFGRUS33061
country1                                            Afghanistan
country2                                                 Russia
type                                                     Import
category                Preparations for Oral or Dental Hygiene
url           /afghanistan/imports/russia/preparations-oral-...
title         Afghanistan imports of preparations fo

Name: 5784, dtype: object
symbol                                              AFGTUR56021
country1                                            Afghanistan
country2                                                 Turkey
type                                                     Import
category      Felt, whether or not impregnated, coated, cove...
url           /afghanistan/imports/turkey/felt-impregnated-c...
title         Afghanistan imports of felt, whether or not im...
lastupdate                                  2021-05-31T10:23:00
Name: 5785, dtype: object
symbol                                              AFGTUR93061
country1                                            Afghanistan
country2                                                 Turkey
type                                                     Import
category      Bombs, Grenades, Torpedoes, Mines, Missiles an...
url           /afghanistan/imports/turkey/bombs-grenades-car...
title         Afghanistan imports of bombs, grenades

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [248]:
all_country_df = te.getCmtCountry(country = 'Afghanistan',output_type = 'df')
all_country_df = all_country_df [:10]


import_country_df = combined_df.loc[combined_df['type'] == 'Import']
import_country_df.reset_index(drop=True, inplace=True)
list_code_dict = []
for index in range(len(import_country_df)):
    code = import_country_df.loc[index,'symbol']
    code_dict = te.getCmtHistorical(symbol = code, output_type = 'dict')
    list_code_dict.append(code_dict)
    time.sleep(1)


df_merged = pd.DataFrame()
for code_dict in list_code_dict:
    code_df = pd.DataFrame(code_dict)
    df_temp = pd.merge(import_country_df, code_df, on='symbol')
    df_merged = pd.concat([df_merged, df_temp])

df_merged = df_merged.drop_duplicates().reset_index(drop=True)
    
    


    

TypeError: unhashable type: 'list'

In [50]:
# te.getCmtCountry(country='United States', after_value= '', output_type='df')

# help(te.getCmtCountry)

def fetch_all_data(country='United States', output_type='df'):
    page = 1
    all_data = []

    while True:
        batch_data = te.getCmtCountry(country=country, page_number=page, output_type=output_type)
        print(f'Done fetching: {page}')
        
        # Check if batch data is None or empty. If so, break the loop.
        if batch_data is None or batch_data.empty:
            break

        all_data.append(batch_data)
        page += 1

    # Combining all batches into a single dataframe
    full_data = pd.concat(all_data, ignore_index=True)
    return full_data

# Fetching all data for the United States
full_data = fetch_all_data(country='United States', output_type='df')
print(full_data)


Done fetching: 1
Done fetching: 2
Done fetching: 3
Done fetching: 4
Done fetching: 5
Done fetching: 6
Done fetching: 7
Done fetching: 8
Done fetching: 9
Done fetching: 10
Done fetching: 11
Done fetching: 12
Done fetching: 13
Done fetching: 14
Done fetching: 15
Done fetching: 16
Done fetching: 17
Done fetching: 18
Done fetching: 19
Done fetching: 20
Done fetching: 21
Done fetching: 22
Done fetching: 23
Done fetching: 24
Done fetching: 25
Done fetching: 26
Done fetching: 27
Done fetching: 28
Done fetching: 29
Done fetching: 30
Done fetching: 31
Done fetching: 32
Done fetching: 33
Done fetching: 34
Done fetching: 35
Done fetching: 36
Done fetching: 37
Done fetching: 38
Done fetching: 39
Done fetching: 40
             symbol       country1     country2      value  date       type  \
0       USANLD40023  United States  Netherlands     596973  2022  Re-export   
1       USANLD40022  United States  Netherlands  118351588  2022     Export   
2       USANLD40021  United States  Netherlands   39

In [48]:
full_data = 

NameError: name 'full_data' is not defined

In [19]:
all_country_df = te.getCmtCountry(country='United States', output_type='df')
all_country_df = all_country_df[:10]

import_country_df = combined_df.loc[combined_df['type'] == 'Import']
import_country_df.reset_index(drop=True, inplace=True)

list_code_dict = []
for index in range(len(import_country_df)):
    code = import_country_df.loc[index, 'symbol']
    code_dict = te.getCmtHistorical(symbol=code, output_type='dict')
    list_code_dict.append(code_dict)
    time.sleep(1)

df_merged = pd.DataFrame()
for code_dict in list_code_dict:
    code_df = pd.DataFrame(code_dict)
    df_temp = pd.merge(import_country_df, code_df, on='symbol')
    df_merged = pd.concat([df_merged, df_temp])

df_merged = df_merged.drop_duplicates().reset_index(drop=True)

# Convert date string to datetime format
df_merged['date'] = pd.to_datetime(df_merged['date'])

# Pivot the table
df_pivot = pd.pivot_table(df_merged, index=['country2', 'date'], columns='category', values='value')

# Reset index
df_pivot = df_pivot.reset_index()

# Rename columns
df_pivot.columns.name = None
df_pivot = df_pivot.rename(columns={'country2': 'country'})

# Reset index
df_pivot = df_pivot.reset_index(drop=True)

NameError: name 'combined_df' is not defined

NameError: name 'df_pivot' is not defined

In [260]:
df_merged

Unnamed: 0,symbol,country1,country2,type,category,url,title,lastupdate,date,value
0,AFGABW00001,Afghanistan,Aruba,Import,,/afghanistan/imports/aruba,Afghanistan imports from Aruba,2021-03-30T00:35:00,2017-12-31T00:00:00,203931.0
1,AFGABW00001,Afghanistan,Aruba,Import,,/afghanistan/imports/aruba,Afghanistan imports from Aruba,2021-03-30T00:35:00,2018-12-31T00:00:00,1062991.0
2,AFGABW00001,Afghanistan,Aruba,Import,,/afghanistan/imports/aruba,Afghanistan imports from Aruba,2021-03-30T00:35:00,2019-12-31T00:00:00,483928.0
3,AFGARE99991,Afghanistan,United Arab Emirates,Import,Estimate Of Low Valued Import Transactions,/afghanistan/imports/united-arab-emirates/comm...,Afghanistan Imports from United Arab Emirates ...,2021-05-31T10:23:00,2008-12-31T00:00:00,1196397.0
4,AFGARE99991,Afghanistan,United Arab Emirates,Import,Estimate Of Low Valued Import Transactions,/afghanistan/imports/united-arab-emirates/comm...,Afghanistan Imports from United Arab Emirates ...,2021-05-31T10:23:00,2009-12-31T00:00:00,27013242.0
5,AFGARE99991,Afghanistan,United Arab Emirates,Import,Estimate Of Low Valued Import Transactions,/afghanistan/imports/united-arab-emirates/comm...,Afghanistan Imports from United Arab Emirates ...,2021-05-31T10:23:00,2010-12-31T00:00:00,54214505.0
6,AFGARE99991,Afghanistan,United Arab Emirates,Import,Estimate Of Low Valued Import Transactions,/afghanistan/imports/united-arab-emirates/comm...,Afghanistan Imports from United Arab Emirates ...,2021-05-31T10:23:00,2011-12-31T00:00:00,100371116.0
7,AFGARE99991,Afghanistan,United Arab Emirates,Import,Estimate Of Low Valued Import Transactions,/afghanistan/imports/united-arab-emirates/comm...,Afghanistan Imports from United Arab Emirates ...,2021-05-31T10:23:00,2015-12-31T00:00:00,167040498.0
8,AFGARE99991,Afghanistan,United Arab Emirates,Import,Estimate Of Low Valued Import Transactions,/afghanistan/imports/united-arab-emirates/comm...,Afghanistan Imports from United Arab Emirates ...,2021-05-31T10:23:00,2016-12-31T00:00:00,92105430.0
9,AFGARE99991,Afghanistan,United Arab Emirates,Import,Estimate Of Low Valued Import Transactions,/afghanistan/imports/united-arab-emirates/comm...,Afghanistan Imports from United Arab Emirates ...,2021-05-31T10:23:00,2017-12-31T00:00:00,26593708.0


In [None]:
  
    df_temp = pd.merge(all_country_df, code_df, on='symbol')
    df_temp = df_temp.drop('symbol', axis=1)
    df_merged = pd.concat([df_merged, df_temp])

df_merged = df_merged.drop_duplicates().reset_index(drop=True)
   
df_merged['date'] = pd.to_datetime(df_merged['date'])

df_pivot = pd.pivot_table(df_merged, index=['country2', 'date'], columns='category', values='value')

df_pivot = df_pivot.sort_index(level=[0, 1], sort_remaining=False)

df_pivot = df_pivot.reset_index()  

In [246]:
df_merged

In [244]:
te.getCmtHistorical(symbol = code, output_type = 'dict')

[{'symbol': 'AFGAREXX181', 'date': '2008-12-31T00:00:00', 'value': 1497298.0},
 {'symbol': 'AFGAREXX181', 'date': '2009-12-31T00:00:00', 'value': 903253.0},
 {'symbol': 'AFGAREXX181', 'date': '2010-12-31T00:00:00', 'value': 1556295.0},
 {'symbol': 'AFGAREXX181', 'date': '2011-12-31T00:00:00', 'value': 3699460.0},
 {'symbol': 'AFGAREXX181', 'date': '2015-12-31T00:00:00', 'value': 3021576.0},
 {'symbol': 'AFGAREXX181', 'date': '2016-12-31T00:00:00', 'value': 20912942.0},
 {'symbol': 'AFGAREXX181', 'date': '2017-12-31T00:00:00', 'value': 560443.0},
 {'symbol': 'AFGAREXX181', 'date': '2018-12-31T00:00:00', 'value': 328459.0},
 {'symbol': 'AFGAREXX181', 'date': '2019-12-31T00:00:00', 'value': 7339337.0}]

In [5]:
# Get comment data for Afghanistan
all_country_df = te.getCmtCountry(country='Afghanistan', output_type='df')
all_country_df = all_country_df[:20]

# Create an empty dataframe to store merged data
df_merged = pd.DataFrame()

# Filter out import data from a combined dataframe and get a list of unique codes
import_country_df = combined_df.loc[combined_df['type'] == 'Import']
import_country_df.reset_index(drop=True, inplace=True)
list_code_df = []
for index in range(len(import_country_df)):
    code = import_country_df.loc[index, 'symbol']
    mydata = te.getCmtHistorical(symbol=code, output_type='df')
    list_code_df.append(tuple(mydata.itertuples(index=False)))
    time.sleep(1)

# Merge comment data with historical data for each unique code
for code_df in list(set(list_code_df)):
    df_temp = pd.merge(all_country_df, pd.DataFrame(code_df), left_on='symbol', right_on='symbol')
    df_temp = df_temp.drop('symbol', axis=1)
    df_merged = pd.concat([df_merged, df_temp])

# Remove any duplicate rows
df_merged = df_merged.drop_duplicates().reset_index(drop=True)

# Convert the date column to datetime format
df_merged['date'] = pd.to_datetime(df_merged['date'])

# Pivot the data by country, date, and category
df_pivot = pd.pivot_table(df_merged, index=['country2', 'date'], columns='category', values='value')

# Sort the index by country and date
df_pivot = df_pivot.sort_index(level=[0, 1], sort_remaining=False)

# Reset the index and flatten the multi-index column names
df_pivot = df_pivot.reset_index()

LoginError: You need to do login before making any request

In [203]:
df_merged = pd.DataFrame()
for code_df in list_code_df:
    df_temp = pd.merge(all_country_df, code_df, left_on='symbol', right_on='symbol')
    df_temp = df_temp.drop('symbol', axis=1)
    df_merged = pd.concat([df_merged, df_temp])

df_merged = df_merged.drop_duplicates().reset_index(drop=True)

In [222]:
df_merged['date'] = pd.to_datetime(df_merged['date'])
# Pivot the data by country, date, and category
# df_pivot = pd.pivot_table(df_merged, index=['country2', 'date'], columns='category', values='value')

# # Sort the index by country and date
# df_pivot = df_pivot.sort_index(level=[0, 1], sort_remaining=False)

# # Reset the index and flatten the multi-index column names
# df_pivot = df_pivot.reset_index()

df_merged

Unnamed: 0,country1,country2,type,category,url,title,lastupdate,date,value
0,Afghanistan,Aruba,Import,"Yeasts, Other Single-cell Micro-organisms, Pre...",/afghanistan/imports/aruba/yeasts-dead-single-...,"Afghanistan imports of yeasts, other single-ce...",2021-03-26T08:58:00,2019-12-31,602.0
1,Afghanistan,Aruba,Import,Butter and other fats and oils derived from milk,/afghanistan/imports/aruba/butter-fats-oils-de...,Afghanistan imports of butter and other fats a...,2021-03-26T08:58:00,2019-12-31,2643.0
2,Afghanistan,Aruba,Import,Electrical Apparatus for Line Telephony or Lin...,/afghanistan/imports/aruba/electric-apparatus-...,Afghanistan imports of electrical apparatus fo...,2021-03-26T08:58:00,2019-12-31,3341.0
3,Afghanistan,Aruba,Import,Cheese and curd,/afghanistan/imports/aruba/cheese-curd,Afghanistan imports of cheese and curd from Aruba,2021-03-26T08:58:00,2018-12-31,34429.0
4,Afghanistan,Aruba,Import,Cheese and curd,/afghanistan/imports/aruba/cheese-curd,Afghanistan imports of cheese and curd from Aruba,2021-03-26T08:58:00,2019-12-31,48302.0
5,Afghanistan,Aruba,Import,Sugar Confectionery (Including White Chocolate...,/afghanistan/imports/aruba/sugar-confection-in...,Afghanistan imports of sugar confectionery (in...,2021-03-26T08:58:00,2019-12-31,1430.0
6,Afghanistan,Aruba,Import,Electric motors and generators (no sets),/afghanistan/imports/aruba/electric-motors-gen...,Afghanistan imports of electric motors and gen...,2021-03-26T08:58:00,2019-12-31,7980.0
7,Afghanistan,Aruba,Import,"Dish Washing Machines, Machinery for Cleaning",/afghanistan/imports/aruba/machines-dishwash-c...,"Afghanistan imports of dish washing machines, ...",2021-03-26T08:58:00,2018-12-31,15175.0
8,Afghanistan,Aruba,Import,"Dish Washing Machines, Machinery for Cleaning",/afghanistan/imports/aruba/machines-dishwash-c...,"Afghanistan imports of dish washing machines, ...",2021-03-26T08:58:00,2019-12-31,19283.0
9,Afghanistan,Aruba,Import,,/afghanistan/imports/aruba,Afghanistan imports from Aruba,2021-03-30T00:35:00,2017-12-31,203931.0


In [208]:
# df_merged = df_merged.reset_index(drop=True)
df_merged['date'] = pd.to_datetime(df_merged['date'])

# pivot the data
df_pivot = pd.pivot_table(df_merged, index=['country2', 'date'], columns='category', values='value')

# sort the index by country and date
df_pivot = df_pivot.sort_index(level=[0, 1], sort_remaining=False)

# reset the index and flatten the multi-index column names
df_pivot = df_pivot.reset_index()
# df_pivot.columns = [f'{i}{j}' if j != '' else f'{i}' for i, j in df_pivot.columns]


In [209]:
df_pivot

category,country2,date,"Animal, vegetable fats and oils, cleavage products",Butter and other fats and oils derived from milk,Cheese and curd,Chocolate and other food products containing cocoa,Cocoa and cocoa preparations,"Coffee, tea, mate and spices","Dish Washing Machines, Machinery for Cleaning",Electric motors and generators (no sets),Electrical Apparatus for Line Telephony or Line Telegraphy,Estimate Of Low Valued Import Transactions,"Fish, crustaceans, molluscs, aquatics invertebrates","Oil seed, oleagic fruits, grain, seed, fruits","Sauces and Preparations Therefor, Mixed Condiments, Mustard Flour Meal","Sugar Confectionery (Including White Chocolate), Not Containing Cocoa","Yeasts, Other Single-cell Micro-organisms, Prepared Baking Powders"
0,Aruba,2018-12-31,,,34429.0,6972.0,,,15175.0,,,,,,,,
1,Aruba,2019-12-31,,2643.0,48302.0,3930.0,,,19283.0,7980.0,3341.0,,,,7338.0,1430.0,602.0
2,United Arab Emirates,2008-12-31,19087329.0,,,,1497298.0,564259.0,,,,1196397.0,,,,,
3,United Arab Emirates,2009-12-31,588494.0,,,,903253.0,107351.0,,,,27013242.0,,,,,
4,United Arab Emirates,2010-12-31,7758939.0,,,,1556295.0,553449.0,,,,54214505.0,,,,,
5,United Arab Emirates,2011-12-31,21948646.0,,,,3699460.0,242928.0,,,,100371116.0,,,,,
6,United Arab Emirates,2015-12-31,8909939.0,,,,3021576.0,1955230.0,,,,167040498.0,,,,,
7,United Arab Emirates,2016-12-31,11816742.0,,,,20912942.0,1032821.0,,,,92105430.0,,,,,
8,United Arab Emirates,2017-12-31,14902312.0,,,,560443.0,1794834.0,,,,26593708.0,7930.0,,,,
9,United Arab Emirates,2018-12-31,7630211.0,,,,328459.0,1310045.0,,,,13563010.0,27824.0,31766.0,,,


In [163]:
    df1 = pd.DataFrame({'a1': ['x', 'y', 'z'], 'b1': [1, 2, 3], 'c1': [4, 5, 6], 'd1': [7, 8, 9]})
    df2 = pd.DataFrame({'a2': ['x', 'x', 'y', 'y', 'z', 'z'], 'b2': [10, 11, 12, 13, 14, 15], 'c2': [16, 17, 18, 19, 20, 21]})

    # merge the dataframes based on column a1 and column a2
    df_merged = pd.merge(df1, df2, left_on='a1', right_on='a2')

    # drop the duplicate column a2
    df_merged = df_merged.drop('a2', axis=1)

In [110]:
te.getCmtUpdates(output_type = 'df')

Unnamed: 0,symbol,country1,country2,type,category,url,title,lastupdate
0,AZEBGR84431,Azerbaijan,Bulgaria,Import,Printing Machinery; Machines for Uses Ancillar...,/azerbaijan/imports/bulgaria/printing-machiner...,Azerbaijan Imports from Bulgaria of Printing M...,2023-04-24T22:59:00
1,AZEBGR85101,Azerbaijan,Bulgaria,Import,"Shavers and Hair Clippers, With Self-contained...",/azerbaijan/imports/bulgaria/electric-shavers-...,Azerbaijan imports of shavers and hair clipper...,2023-04-24T22:59:00
2,AZEBGR85171,Azerbaijan,Bulgaria,Import,Electrical Apparatus for Line Telephony or Lin...,/azerbaijan/imports/bulgaria/electric-apparatu...,Azerbaijan Imports from Bulgaria of Electrical...,2023-04-24T22:59:00
3,AZEBIH85101,Azerbaijan,Bosnia And Herzegovina,Import,"Shavers and Hair Clippers, With Self-contained...",/azerbaijan/imports/bosnia-herzegovina/electri...,Azerbaijan imports of shavers and hair clipper...,2023-04-24T22:59:00
4,AZEBRA29051,Azerbaijan,Brazil,Import,"Acyclic Alcohols, Their Halogenated, Sulfonate...",/azerbaijan/imports/brazil/acyclic-alcoholshal...,"Azerbaijan imports of acyclic alcohols, their ...",2023-04-24T22:59:00
...,...,...,...,...,...,...,...,...
9995,HUNTZAXX382,Hungary,Tanzania,Export,Miscellaneous chemical products,/hungary/exports/tanzania/miscellaneous-chemic...,Hungary exports of miscellaneous chemical prod...,2023-04-04T14:51:00
9996,HUNTZAXX392,Hungary,Tanzania,Export,Plastics,/hungary/exports/tanzania/plastics,Hungary exports of plastics to Tanzania,2023-04-04T14:51:00
9997,HUNTZAXX402,Hungary,Tanzania,Export,Rubbers,/hungary/exports/tanzania/rubbers,Hungary exports of rubbers to Tanzania,2023-04-04T14:51:00
9998,HUNTZAXX411,Hungary,Tanzania,Import,Raw hides and skins (other than furskins) and ...,/hungary/imports/tanzania/raw-hides-skins-than...,Hungary Imports from Tanzania of Raw hides and...,2023-04-04T14:51:00
