# Real Time Currency Conversion Branching Classification 

Costa Rica

Belinda Brown, belindabrownr04@gmail.com

[![GitHub](https://badgen.net/badge/icon/github?icon=github&label)](https://github.com) [brown9804](https://github.com/brown9804)

Jan, 2021

## Import packages 

In [None]:
!pip install prettytable
!pip install tkinter
!pip install requests

In [2]:
import os
import glob
import logging
import sys
import pandas as pd
import numpy as np
import openpyxl
from tqdm import tqdm
from datetime import date
import matplotlib.pyplot as plt  
import sklearn
from sklearn import preprocessing
from sklearn import metrics
from azureml.core import Workspace, Dataset
import time
import prettytable
import requests
from tkinter import *
import tkinter as tk
from tkinter import ttk

## Functions Definitions

In [3]:
def xcolumnlookup(lookup_value, lookup_array, return_array, if_not_found:str = ''):
    match_value = return_array.loc[lookup_array == lookup_value]
    if match_value.empty:
        return "Not Found" if if_not_found == '' else if_not_found

    else:
        return match_value.tolist()[0]

In [4]:
def filter_unique(dataframe, column, newcolumn):
    count_unique = 0
    output_df = pd.DataFrame()
    dataframe = dataframe[column]
    print("Total of items: ", len(dataframe))
    # using set()
    # to remove duplicated 
    # from list 
    output_df = list(set(dataframe))
    count_unique = len(output_df)
    print("Unique items: ", count_unique)
    unique_column = pd.Series(output_df,name=newcolumn)
    return output_df, count_unique, unique_column

In [5]:
def snapshot_by_equalcondition(source_df, column_name, condition):
    snapshot_df = source_df.loc[source_df[column_name] == condition]
    return snapshot_df

In [6]:
def mapping_data(dataframe):
    head = pd.DataFrame(dataframe.iloc[:3])
    display(head)
    
    output = {}
    output['Column Names'] = dataframe.columns.values.tolist()
    pd.set_option('display.max_colwidth', -1)
    column_names = pd.DataFrame(data=output)
    display(column_names)
    
    output = {}
    output['Data Types'] = dataframe.dtypes
    pd.set_option('display.max_colwidth', -1) 
    column_dtypes = pd.DataFrame(data=output)
    display(column_dtypes)
    
    output = {}
    output['Null Values'] = dataframe.isna().sum()  # #  Is null # this is to validate the amount of rows within a columns that carry information
    pd.set_option('display.max_colwidth', -1) 
    null_values = pd.DataFrame(data=output)
    display(null_values)

In [7]:
class RealTimeCurrencyConverter():
    def __init__(self,url):
            self.data = requests.get(url).json()
            self.currencies = self.data['rates']

    def convert(self, from_currency, to_currency, amount): 
        initial_amount = amount 
        if from_currency != 'USD' : 
            amount = amount / self.currencies[from_currency] 
  
        # limiting the precision to 4 decimal places 
        amount = round(amount * self.currencies[to_currency], 4) 
        return amount

In [8]:
def reading_csv_data(filename):
    source_df = pd.read_csv(filename, sep=',', encoding='utf-8', engine='python',error_bad_lines=False) 
    return source_df

## Reading source data 

In [9]:
source_df = reading_csv_data("./objects_data_small_slide.csv")

## Mapping 

In [10]:
mapping_data(source_df)

Unnamed: 0,Object ID,Object Type,Object Key,Creation Date,Delivery date,Lead Time,Amount,Currency,Comments
0,1000160868,Bathroom,13-bath-red,9/6/2021,12/9/2021,94,22 400.29,JPY,13-bath-red_Bathroom
1,1086686178,Livingroom,1233-liv-cup,9/30/2021,11/20/2021,51,1 360.13,JPY,1233-liv-cup_Livingroom
2,1086686191,Bathroom,13-bath-red,9/30/2021,11/24/2021,55,1 126.44,JPY,13-bath-red_Bathroom


Unnamed: 0,Column Names
0,Object ID
1,Object Type
2,Object Key
3,Creation Date
4,Delivery date
5,Lead Time
6,Amount
7,Currency
8,Comments


Unnamed: 0,Data Types
Object ID,int64
Object Type,object
Object Key,object
Creation Date,object
Delivery date,object
Lead Time,object
Amount,object
Currency,object
Comments,object


Unnamed: 0,Null Values
Object ID,0
Object Type,0
Object Key,0
Creation Date,0
Delivery date,0
Lead Time,0
Amount,0
Currency,0
Comments,0


In [11]:
source_df["Amount"] = [str(i).replace(" ", "") for i in source_df["Amount"]]

In [12]:
source_df.head(4)

Unnamed: 0,Object ID,Object Type,Object Key,Creation Date,Delivery date,Lead Time,Amount,Currency,Comments
0,1000160868,Bathroom,13-bath-red,9/6/2021,12/9/2021,94,22400.29,JPY,13-bath-red_Bathroom
1,1086686178,Livingroom,1233-liv-cup,9/30/2021,11/20/2021,51,1360.13,JPY,1233-liv-cup_Livingroom
2,1086686191,Bathroom,13-bath-red,9/30/2021,11/24/2021,55,1126.44,JPY,13-bath-red_Bathroom
3,1086683671,Bathroom,13-bath-red,9/30/2021,4/29/2023,576,164.97,JPY,13-bath-red_Bathroom


In [14]:
source_df = source_df.astype({'Amount':'float64'})

In [15]:
source_df.shape

(9395, 9)

### **Cleaning up**

In [16]:
source_df = source_df.drop_duplicates(subset="Object ID")

In [18]:
source_df.shape

(7641, 9)

## Exchange rate

In [19]:
source_df.head(4)

Unnamed: 0,Object ID,Object Type,Object Key,Creation Date,Delivery date,Lead Time,Amount,Currency,Comments
0,1000160868,Bathroom,13-bath-red,9/6/2021,12/9/2021,94,22400.29,JPY,13-bath-red_Bathroom
1,1086686178,Livingroom,1233-liv-cup,9/30/2021,11/20/2021,51,1360.13,JPY,1233-liv-cup_Livingroom
2,1086686191,Bathroom,13-bath-red,9/30/2021,11/24/2021,55,1126.44,JPY,13-bath-red_Bathroom
3,1086683671,Bathroom,13-bath-red,9/30/2021,4/29/2023,576,164.97,JPY,13-bath-red_Bathroom


In [20]:
currency_change_df = source_df.copy()

In [21]:
currency_change_df.head(4)

Unnamed: 0,Object ID,Object Type,Object Key,Creation Date,Delivery date,Lead Time,Amount,Currency,Comments
0,1000160868,Bathroom,13-bath-red,9/6/2021,12/9/2021,94,22400.29,JPY,13-bath-red_Bathroom
1,1086686178,Livingroom,1233-liv-cup,9/30/2021,11/20/2021,51,1360.13,JPY,1233-liv-cup_Livingroom
2,1086686191,Bathroom,13-bath-red,9/30/2021,11/24/2021,55,1126.44,JPY,13-bath-red_Bathroom
3,1086683671,Bathroom,13-bath-red,9/30/2021,4/29/2023,576,164.97,JPY,13-bath-red_Bathroom


In [22]:
url = 'https://api.exchangerate-api.com/v4/latest/USD'
converter = RealTimeCurrencyConverter(url)

In [23]:
currency_change_df['std_US_amount'] = currency_change_df.apply(lambda row : converter.convert(row['Currency'], "USD", row['Amount']), axis = 1)

In [24]:
currency_change_df.head(5)

Unnamed: 0,Object ID,Object Type,Object Key,Creation Date,Delivery date,Lead Time,Amount,Currency,Comments,std_US_amount
0,1000160868,Bathroom,13-bath-red,9/6/2021,12/9/2021,94,22400.29,JPY,13-bath-red_Bathroom,191.2593
1,1086686178,Livingroom,1233-liv-cup,9/30/2021,11/20/2021,51,1360.13,JPY,1233-liv-cup_Livingroom,11.6131
2,1086686191,Bathroom,13-bath-red,9/30/2021,11/24/2021,55,1126.44,JPY,13-bath-red_Bathroom,9.6178
3,1086683671,Bathroom,13-bath-red,9/30/2021,4/29/2023,576,164.97,JPY,13-bath-red_Bathroom,1.4086
4,994496816,Livingroom,1233-liv-cup,12/10/2021,1/24/2022,45,16160.0,COP,1233-liv-cup_Livingroom,4.2315


## Snapshot for the analysis

In [25]:
std_curr = currency_change_df.copy()

In [26]:
std_curr.head(4)

Unnamed: 0,Object ID,Object Type,Object Key,Creation Date,Delivery date,Lead Time,Amount,Currency,Comments,std_US_amount
0,1000160868,Bathroom,13-bath-red,9/6/2021,12/9/2021,94,22400.29,JPY,13-bath-red_Bathroom,191.2593
1,1086686178,Livingroom,1233-liv-cup,9/30/2021,11/20/2021,51,1360.13,JPY,1233-liv-cup_Livingroom,11.6131
2,1086686191,Bathroom,13-bath-red,9/30/2021,11/24/2021,55,1126.44,JPY,13-bath-red_Bathroom,9.6178
3,1086683671,Bathroom,13-bath-red,9/30/2021,4/29/2023,576,164.97,JPY,13-bath-red_Bathroom,1.4086


### **Branch # 1**

In [27]:
std_curr['std US 0-60'] = std_curr['std_US_amount'].between(0, 60, inclusive=True)

### **Branch # 2**

In [28]:
std_curr['std US 61-300'] = std_curr['std_US_amount'].between(61, 300, inclusive=True)

### **Branch # 3**

In [29]:
std_curr['std US 301-600'] = std_curr['std_US_amount'].between(301, 600, inclusive=True)

### **Extract snapshots**

In [30]:
std_curr.head(5)

Unnamed: 0,Object ID,Object Type,Object Key,Creation Date,Delivery date,Lead Time,Amount,Currency,Comments,std_US_amount,std US 0-60,std US 61-300,std US 301-600
0,1000160868,Bathroom,13-bath-red,9/6/2021,12/9/2021,94,22400.29,JPY,13-bath-red_Bathroom,191.2593,False,True,False
1,1086686178,Livingroom,1233-liv-cup,9/30/2021,11/20/2021,51,1360.13,JPY,1233-liv-cup_Livingroom,11.6131,True,False,False
2,1086686191,Bathroom,13-bath-red,9/30/2021,11/24/2021,55,1126.44,JPY,13-bath-red_Bathroom,9.6178,True,False,False
3,1086683671,Bathroom,13-bath-red,9/30/2021,4/29/2023,576,164.97,JPY,13-bath-red_Bathroom,1.4086,True,False,False
4,994496816,Livingroom,1233-liv-cup,12/10/2021,1/24/2022,45,16160.0,COP,1233-liv-cup_Livingroom,4.2315,True,False,False


In [31]:
std_1 = snapshot_by_equalcondition(std_curr, "std US 0-60", True)
std_2 = snapshot_by_equalcondition(std_curr, "std US 61-300", True)
std_3 = snapshot_by_equalcondition(std_curr, "std US 301-600", True)

In [32]:
output = {}
output['Description'] = 'Shapes'
output['Input Data'] = str(source_df.shape)
output['Split #1'] = str( std_1.shape)
output['Split #2'] = str( std_2.shape)
output['Split #3'] = str( std_3.shape)
pd.set_option('display.max_colwidth', -1) 
pd.DataFrame(data=output, index=['']).T

Unnamed: 0,Unnamed: 1
Description,Shapes
Input Data,"(7641, 9)"
Split #1,"(2380, 13)"
Split #2,"(1153, 13)"
Split #3,"(692, 13)"


### **Unique Items**

In [33]:
UniqueItems, len_UniqueItems, x0 = filter_unique(source_df, 'Object ID', 'Unique Items')

Total of items:  7641
Unique items:  7641


In [34]:
std_curr.head(3)

Unnamed: 0,Object ID,Object Type,Object Key,Creation Date,Delivery date,Lead Time,Amount,Currency,Comments,std_US_amount,std US 0-60,std US 61-300,std US 301-600
0,1000160868,Bathroom,13-bath-red,9/6/2021,12/9/2021,94,22400.29,JPY,13-bath-red_Bathroom,191.2593,False,True,False
1,1086686178,Livingroom,1233-liv-cup,9/30/2021,11/20/2021,51,1360.13,JPY,1233-liv-cup_Livingroom,11.6131,True,False,False
2,1086686191,Bathroom,13-bath-red,9/30/2021,11/24/2021,55,1126.44,JPY,13-bath-red_Bathroom,9.6178,True,False,False


Can be used with group by:

In [None]:
gb.agg        gb.boxplot    gb.cummin     gb.describe   gb.filter     gb.get_group  gb.height     gb.last       gb.median     gb.ngroups    gb.plot       gb.rank       gb.std        gb.transform
gb.aggregate  gb.count      gb.cumprod    gb.dtype      gb.first      gb.groups     gb.hist       gb.max        gb.min        gb.nth        gb.prod       gb.resample   gb.sum        gb.var
gb.apply      gb.cummax     gb.cumsum     gb.fillna     gb.gender     gb.head       gb.indices    gb.mean       gb.name       gb.ohlc       gb.quantile   gb.size       gb.tail       gb.weight

In [35]:
std_curr.dtypes

Object ID         int64  
Object Type       object 
Object Key        object 
Creation Date     object 
Delivery date     object 
Lead Time         object 
Amount            float64
Currency          object 
Comments          object 
std_US_amount     float64
std US 0-60       bool   
std US 61-300     bool   
std US 301-600    bool   
dtype: object

In [36]:
std_curr["Lead Time"] = [str(i).replace(" ", "") for i in std_curr["Lead Time"]]

In [37]:
std_curr = std_curr.astype({'Lead Time':'int64'})

In [38]:
std_curr_with_median = std_curr.groupby(['Object ID','Object Type']).agg(arg={'Lead Time':[('_mean','mean')],'Amount':[('','median')]}).reset_index()

In [39]:
std_curr_with_max = std_curr.groupby(['Object ID','Object Type']).agg(arg={'Lead Time':[('_mean','mean')],'Amount':[('','max')]}).reset_index()

In [40]:
std_curr_with_median.head(3)

Unnamed: 0_level_0,Object ID,Object Type,Lead Time,Amount
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,_mean,Unnamed: 4_level_1
0,1901968,Kitchen,815,7389.12
1,1966091,Bedroom,571,23861.69
2,1966478,Bedroom,5761,10764.38


In [41]:
std_curr_with_max.head(3)

Unnamed: 0_level_0,Object ID,Object Type,Lead Time,Amount
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,_mean,Unnamed: 4_level_1
0,1901968,Kitchen,815,7389.12
1,1966091,Bedroom,571,23861.69
2,1966478,Bedroom,5761,10764.38


In [None]:
# Export to CSV
source_df.to_csv('./Exporting_DataFrame.csv', index=False)    

## References

[1] From https://stackoverflow.com/questions/8420143/valueerror-could-not-convert-string-to-float-id <br/>
[2] From https://www.geeksforgeeks.org/apply-function-to-every-row-in-a-pandas-dataframe/ <br/>
[3] From https://data-flair.training/blogs/currency-converter-python/ <br/>

# END