In [None]:
!pip install pandas
!pip install fuzzywuzzy

In [3]:
# obtain and group historical price data
import pandas as pd
import os

root = "./datasets/historical_prices/"
historical_price = pd.DataFrame(columns=['company_name'])

for file in os.listdir(root):
    df = pd.read_csv(root + file)
    df['company_name'] = df['company_name'].str.lower()

    historical_price = pd.merge(historical_price, df, how="outer", on=["company_name"])

    historical_price = historical_price.rename(columns={'price': 'price_' + file})


grouped_price = historical_price.groupby(['company_name']).mean()
grouped_price.to_csv('./datasets/historical_prices.csv')

In [None]:
# produce risk score based on price data
import pandas as pd
from fuzzywuzzy.fuzz import partial_ratio

grouped_price = pd.read_csv('./datasets/historical_prices.csv')

risk_score = pd.DataFrame(columns=['company_name', 'num_matches', 'num_jumps', 'max_change'])
for company in grouped_price['company_name']:
    print(company)
        
    fuzzy_matches = [ind for ind in grouped_price.index if partial_ratio(company, grouped_price.company_name[ind]) >= 90]
    matched_rows = grouped_price.loc[fuzzy_matches]
    mean_prices = matched_rows.mean(axis=0).dropna()

    num_jumps = max_change = 0
    prev_price = mean_prices[0]
    for price in mean_prices[1:]:
        if prev_price != price:
            num_jumps += 1
            change = abs((price - prev_price)/prev_price)

            if  change > max_change:
                max_change = change
        
        prev_price = price

    risk_score = risk_score.append({
        'company_name':company, 
        'num_matches': len(fuzzy_matches), 
        'num_jumps': num_jumps,
        'max_change': max_change*100
    }, ignore_index=True)

normalized_risk_score = (risk_score-risk_score.mean())/risk_score.std()
normalized_risk_score['company_name'] = risk_score['company_name']

risk_score.to_csv('./datasets/risk_score.csv')
normalized_risk_score.to_csv('./datasets/risk_score_normed.csv')