In [48]:
# Import dataset of variables for Japan into Python
import pandas as pd

# Allow the python to access the downloads file to access the relevant data for Japan
japan_file = '/Users/umikavekaria/Documents/GitHub/EC1B1-Group-42/International_Financial_Statistics_Japan.xlsx'

# Allow the python to access the downloads file to access the relevant data for US
us_file = '/Users/umikavekaria/Documents/GitHub/EC1B1-Group-42/International_Financial_Statistics_US.xlsx'

# Load these files into a Panda DataFrame
japan_df = pd.read_excel(japan_file)
us_df = pd.read_excel(us_file)

# Merge the two datasets together in Python
merged_df = pd.concat([japan_df, us_df], axis=1)

# Create a new DataFrame by copying the data from the merged_df
df = pd.DataFrame(merged_df)



# Assign each column to a variable, except exchange rates (deal with this seperately)
# Convert each column to numeric, handling errors by coercing non-numeric values to NaN
industrial_production_japan = pd.to_numeric(df.iloc[:, 1], errors='coerce')
international_reserves_japan = pd.to_numeric(df.iloc[:, 3], errors='coerce')
cpi_japan = pd.to_numeric(df.iloc[:, 4], errors='coerce')

international_reserves_usa = pd.to_numeric(df.iloc[:, 6], errors='coerce')
cpi_usa = pd.to_numeric(df.iloc[:, 7], errors='coerce')

# Convert the exchange rates column to numeric, handling errors by coercing non-numeric values to NaN
exchange_rates = pd.to_numeric(df.iloc[:, 2], errors='coerce')

# Fix the exchange_rates_japan column to become from US Dollar per Domestic Currency to Domestic Currency per US Dollar by doing (1 / exchange rate (USD/JPY))
exchange_rates_japan = 1 / exchange_rates



# Construct variables for Japan 
# Calculate the monthly growth in the nominal exchange rates for Japan (((nominal exchange rate this month - nominal exchange rate last month) / nominal exchange rate last month) * 100)
monthly_growth_exchange_rates_nominal = exchange_rates_japan.pct_change() * 100

# Calculate the real exchange rate for JPY/USD by doing ((nominal exchange rate in Japan * CPI in Japan) / (CPI in USA))
real_exchange_rate_japan = (exchange_rates_japan * cpi_japan) / (cpi_usa)

# Calculate the monthly growth in real exchange rate for JPY/USD by doing (((real exchange rate this month - real exchange rate last month) / real exchange rate last month) * 100)
monthly_growth_exchange_rates_real = real_exchange_rate_japan.pct_change() * 100

# Calculate the monthly inflation rate by doing (((inflation rate this month - inflation rate last month) / inflation rate last month) * 100)
monthly_inflation_rate_japan = cpi_japan.pct_change() * 100

# Calculate the monthly growth in industrial production
monthly_growth_industrial_production = industrial_production_japan.pct_change() * 100

# Calculate the growth in industrial production versus 12 months ago
    ## Extract the value of industrial production from 12 months ago
industrial_production_japan_12_months_ago = industrial_production_japan.shift(12)

    ## Calculate growth in industrial production by doing (((industrial production this month - industrial production 12 months ago) / industrial production 12 months ago) * 100)
growth_in_industrial_production = ((industrial_production_japan - industrial_production_japan_12_months_ago) / industrial_production_japan_12_months_ago) * 100

# Calculate an index of the value of international reserves (base month = 100 (January 1960))
    ## Extract the value of international reserves during January 1960
reserves_january_1960_japan = international_reserves_japan[1]

    ## Calculate the index of international reserves by doing (((international reserves this month) / international reserves in January 1960) * 100)
international_reserves_japan_index = (international_reserves_japan / reserves_january_1960_japan) * 100



# Construct variables for USA
# Calculate the monthly inflation rate by doing (((inflation rate this month - inflation rate last month) / inflation rate last month) * 100)
monthly_inflation_rate_usa = cpi_usa.pct_change() * 100

# Calculate an index of the value of international reserves (base month = 100 (January 1960))
    ## Extract the value of international reserves during January 1960
reserves_january_1960_usa = international_reserves_usa[1]

    ## Calculate the index of international reserves by doing (((international reserves this month) / international reserves in January 1960) * 100)
international_reserves_usa_index = (international_reserves_usa / reserves_january_1960_usa) * 100




# Identify the outliers in each data series using the Interquartile Range (IQR) method
# Calculate Q1 (first quartile) and Q3 (third quartile) for industrial production in Japan
industrial_production_japan_Q1  = industrial_production_japan.quantile(0.25)
industrial_production_japan_Q3  = industrial_production_japan.quantile(0.75)

# Calculate Interquartile Range (IQR) for industrial production in Japan
industrial_production_IQR = industrial_production_japan_Q3 - industrial_production_japan_Q1

# Define the threshold for outliers for industrial production in Japan
threshold_low_ip_japan = industrial_production_japan_Q1 - (1.5 * industrial_production_IQR)
threshold_high_ip_japan = industrial_production_japan_Q3 + (1.5 * industrial_production_IQR)

# Extract the outliers for industrial production in Japan
industrial_production_japan_outliers = (industrial_production_japan < threshold_low_ip_japan) | (industrial_production_japan > threshold_high_ip_japan)
industrial_production_japan_outliers_values = industrial_production_japan[industrial_production_japan_outliers]



# Calculate Q1 (first quartile) and Q3 (third quartile) for international reserves in Japan
international_reserves_japan_Q1 = international_reserves_japan.quantile(0.25)
international_reserves_japan_Q3 = international_reserves_japan.quantile(0.75)

# Calculate Interquartile Range (IQR) for international reserves in Japan
international_reserves_IQR = international_reserves_japan_Q3 - international_reserves_japan_Q1

# Define the threshold for outliers for international reserves in Japan
threshold_low_ir_japan = international_reserves_japan_Q1 - (1.5 * international_reserves_IQR)
threshold_high_ir_japan = international_reserves_japan_Q3 + (1.5 * international_reserves_IQR)

# Extract the outliers for international reserves in Japan
international_reserves_japan_outliers = (international_reserves_japan < threshold_low_ir_japan) | (international_reserves_japan > threshold_high_ir_japan)
international_reserves_japan_outliers_values = international_reserves_japan[international_reserves_japan_outliers]



# Calculate Q1 (first quartile) and Q3 (third quartile) for Consumer Price Index in Japan
cpi_japan_Q1 = cpi_japan.quantile(0.25)
cpi_japan_Q3 = cpi_japan.quantile(0.75)

# Calculate Interquartile Range (IQR) for Consumer Price Index in Japan
cpi_IQR = cpi_japan_Q3 - cpi_japan_Q1

# Define the threshold for outliers for Consumer Price Index in Japan
threshold_low_cpi_japan = cpi_japan_Q1 - (1.5 * cpi_IQR)
threshold_high_cpi_japan = cpi_japan_Q3 + (1.5 * cpi_IQR)

# Extract the outliers for Consumer Price Index in Japan
cpi_japan_outliers = (cpi_japan < threshold_low_cpi_japan) | (cpi_japan > threshold_high_cpi_japan)
cpi_japan_outliers_values = cpi_japan[cpi_japan_outliers]



# Calculate Q1 (first quartile) and Q3 (third quartile) for exchange rates (JPY/USD)
exchange_rates_japan_Q1 = exchange_rates_japan.quantile(0.25)
exchange_rates_japan_Q3 = exchange_rates_japan.quantile(0.75)

# Calculate Interquartile Range (IQR) for exchange rates (JPY/USD)
exchange_rates_IQR = exchange_rates_japan_Q3 - exchange_rates_japan_Q1

# Define the threshold for outliers for exchange rates (JPY/USD)
threshold_low_er_japan = exchange_rates_japan_Q1 - (1.5 * exchange_rates_IQR)
threshold_high_er_japan = exchange_rates_japan_Q3 + (1.5 * exchange_rates_IQR)

# Extract the outliers for exchange rates (JPY/USD)
exchange_rates_japan_outliers = (exchange_rates_japan < threshold_low_er_japan) | (exchange_rates_japan > threshold_high_er_japan)
exchange_rates_japan_outliers_values = exchange_rates_japan[exchange_rates_japan_outliers]



# Calculate Q1 (first quartile) and Q3 (third quartile) for international reserves in USA
international_reserves_usa_Q1 = international_reserves_usa.quantile(0.25)
international_reserves_usa_Q3 = international_reserves_usa.quantile(0.75)

# Calculate Interquartile Range (IQR) for international reserves in USA
international_reserves_usa_IQR = international_reserves_usa_Q3 - international_reserves_usa_Q1

# Define the threshold for outliers for international reserves in USA
threshold_low_ir_usa = international_reserves_usa_Q1 - (1.5 * international_reserves_usa_IQR)
threshold_high_ir_usa = international_reserves_usa_Q3 + (1.5 * international_reserves_usa_IQR)

# Extract the outliers for international reserves in USA
international_reserves_usa_outliers = (international_reserves_usa < threshold_low_ir_usa) | (international_reserves_usa > threshold_high_ir_usa)
international_reserves_usa_outliers_values = international_reserves_usa[international_reserves_usa_outliers]



# Calculate Q1 (first quartile) and Q3 (third quartile) for Consumer Price Index in USA
cpi_usa_Q1 = cpi_usa.quantile(0.25)
cpi_usa_Q3 = cpi_usa.quantile(0.75)

# Calculate Interquartile Range (IQR) for Consumer Price Index in USA
cpi_usa_IQR = cpi_usa_Q3 - cpi_usa_Q1

# Define the threshold for outliers for Consumer Price Index in USA
threshold_low_cpi_usa = cpi_usa_Q1 - (1.5 * cpi_usa_IQR)
threshold_high_cpi_usa = cpi_usa_Q3 + (1.5 * cpi_usa_IQR)

# Extract the outliers for Consumer Price Index in USA
cpi_usa_outliers = (cpi_usa < threshold_low_cpi_usa) | (cpi_usa > threshold_high_cpi_usa)
cpi_usa_outliers_values = cpi_usa[cpi_usa_outliers]



# Use another method to check for outliers for international reserves in Japan, CPI in USA, and international reserves in USA
# Use z-score method to check for outliers for international reserves in Japan
import numpy as np 
 
# Convert DataFrame to NumPy array for international reserves in Japan
ir_japan = international_reserves_japan.values
 
# Calculate mean and standard deviation for international reserves in Japan
mean_ir_japan = np.mean(ir_japan)
std_dev_ir_japan = np.std(ir_japan)

# Define threshold for z-score for international reserves in Japan
threshold = 2

# Calculate z-scores for each data point for international reserves in Japan
z_scores_ir_japan = np.abs((ir_japan - mean_ir_japan) / std_dev_ir_japan)
 
# Identify outliers for international reserves in Japan
outliers_ir_japan = ir_japan[z_scores_ir_japan > threshold]



# Use z-score method to check for outliers for CPI in USA
# Convert DataFrame to NumPy array for for CPI in USA
prices_usa = cpi_usa.values
 
# Calculate mean and standard deviation for for CPI in USA
mean_prices_usa = np.mean(prices_usa)
std_dev_prices_usa = np.std(prices_usa)

# Define threshold for z-score for for CPI in USA
threshold = 2

# Calculate z-scores for each data point for for CPI in USA
z_scores_prices_usa = np.abs((prices_usa - mean_prices_usa) / std_dev_prices_usa)
 
# Identify outliers for for CPI in USA
outliers_prices_usa = prices_usa[z_scores_prices_usa > threshold]

print(outliers_prices_usa)












AttributeError: 'numpy.float64' object has no attribute 'values'