In [1]:
import mysql.connector as mysql
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
from sqlalchemy import create_engine
from datetime import timedelta, datetime
import math

In [2]:
# Print multiple outputs in a single cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Show all rows in dataframe
pd.set_option('display.max_rows', None)

In [3]:
db = mysql.connect(
        host="localhost",
        user="root",
        passwd="oFbByWK22xi2+Ah^",
        database="value_investing_dev"
        )

cursor = db.cursor()

table_name = "ranking_data"

current_company = "BRBY"

In [4]:
df = pd.read_csv(f"data/database_tables/{table_name}.csv")

# column names and sql builder
sql_col_names = []
col_names_list = []

for col in df.columns: 
    sql_col_names.append(col)
    col_names_list.append(col.split(' ')[0])
del col_names_list[0]

sql_col_names = ', '.join(sql_col_names)

cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} ({sql_col_names})")

In [5]:
db_connection_str = (
    "mysql://root:oFbByWK22xi2+Ah^@localhost/value_investing_dev"
)

# Connect to database
db_connection = create_engine(db_connection_str)

# Read DCF variables
df_dcf_variables = pd.read_sql(
    "SELECT param_name, value FROM calc_variables LEFT JOIN parameters ON calc_variables.parameter_id = parameters.id",
    con=db_connection,
)

# Get Params
df_params = pd.read_sql(
    f"SELECT id, param_name FROM parameters",
    con=db_connection,
)

# Get Companies
df_companies = pd.read_sql(
    f"SELECT id, tidm, company_name FROM companies",
    con=db_connection,
)

In [6]:
# Get Report Data
cursor.execute(f"SELECT time_stamp, value, param_name FROM calculated_data LEFT JOIN companies ON calculated_data.company_id = companies.id LEFT JOIN parameters ON calculated_data.parameter_id = parameters.id WHERE tidm = 'AV.'")
data_tidm = cursor.fetchall()

# Convert to dataframe
df_tidm = pd.DataFrame(data_tidm, columns=['time_stamp', 'value', 'parameter'])
# df_tidm.head()

# Pivot dataframe
df_tidm_pivot = df_tidm.pivot(columns='time_stamp', index='parameter', values='value')

# Select Subset
df_rev_rate = df_tidm_pivot['Revenue Growth (10 year)':'Revenue Growth (10 year)']
df_earn_rate = df_tidm_pivot['Earnings Growth (10 year)':'Earnings Growth (10 year)']
df_div_rate = df_tidm_pivot['Dividend Growth (10 year)':'Dividend Growth (10 year)']
df_overall_rate = df_tidm_pivot['Overall Growth (10 year)':'Overall Growth (10 year)']
df_calculated = pd.concat([df_rev_rate, df_earn_rate, df_div_rate, df_overall_rate]).astype('float')
df_calculated

time_stamp,1981-12-31,1982-12-31,1983-12-31,1984-12-31,1985-12-31,1986-12-31,1987-12-31,1988-12-31,1989-12-31,1990-12-31,...,2010-12-31,2011-12-31,2012-12-31,2013-12-31,2014-12-31,2015-12-31,2016-12-31,2017-12-31,2018-12-31,2019-12-31
parameter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Revenue Growth (10 year),,,,,,,,,,,...,,,,-19.924037,-10.578433,-3.004132,9.59875,11.984257,-2.688258,16.886815
Earnings Growth (10 year),,,,,,,,,,,...,139.60114,34.643735,-72.722222,-76.958719,-61.303191,-24.658519,-16.682028,-4.756243,-21.806569,171.690428
Dividend Growth (10 year),,,,,,,,,,0.0,...,-3.055229,4.137931,-8.203125,-27.448609,-42.303433,-43.854167,-30.888889,-13.333333,6.887417,-5.106383
Overall Growth (10 year),,,,,,,,,,0.0,...,68.272955,19.390833,-40.462674,-41.443789,-38.061686,-23.838939,-12.657389,-2.035106,-5.869137,61.156953


In [7]:
# Calculate rank for each type
ranktype_list = [
    'Growth Rate (10 year)',
    'Growth Quality',
    'Median ROCE (10 year)',
    'PE10',
    'DP10',
]

rank_df_list = []
values_df_list = []
num_ranks = len(ranktype_list)
rank_num = 0

for rank_type in ranktype_list:
# rank_type = ranktype_list[1]
    print(f"Rank {rank_num} of {num_ranks}, {rank_type}")

    # Get Report Data
    cursor.execute(f"SELECT time_stamp, value, tidm FROM calculated_data LEFT JOIN companies ON calculated_data.company_id = companies.id LEFT JOIN parameters ON calculated_data.parameter_id = parameters.id WHERE param_name = '{rank_type}'")
    data = cursor.fetchall()

    # Create Dataframe
    df = pd.DataFrame(data, columns=['time_stamp', 'value', 'tidm'])

    # Offset dates by 1 day to account for companies
    # reporting on first of the year
    df['time_stamp_delta'] = df['time_stamp'] + pd.DateOffset(days=-1)
    df['year'] = pd.DatetimeIndex(df['time_stamp_delta']).year

    # Show duplicate rows
    duplicated_columns_df = df[df.duplicated(subset=['tidm', 'year'], keep=False)]
    # duplicated_columns_df

    # Remove Duplicates and take last value
    # This accounts for companies reporting
    # twice in a year and take last report
    df2 = df.drop_duplicates(subset=['tidm', 'year'], keep='last')

    # Pivot dataframe
    df_pivot = df2.pivot(columns='year', index='tidm', values='value').replace(to_replace='None', value=None).astype('float')
    # df_pivot

    # Create Growth list
    last_list = []
    tidm_list = df_pivot.index

    # Get last value if exists,
    # if not, then take second to last
    for i, row in df_pivot.iterrows():
        current_value = row.values[df_pivot.shape[1]-1]

        if math.isnan(current_value):
            current_value = row.values[df_pivot.shape[1]-2]

        last_list.append(current_value)

    # Convert to dataframe
    df_growth_values = pd.DataFrame(
        data=last_list
        )
    df_growth_values.columns = [f"{ranktype_list[rank_num]} Rank Value"]
    df_growth_values.index = tidm_list

    # Replace NaN with -999
    df_growth_values = df_growth_values.fillna(-999)
    if ranktype_list[rank_num] == 'PE10' or ranktype_list[rank_num] == 'DP10':
        mask = df_growth_values[f"{ranktype_list[rank_num]} Rank Value"].gt(0)
        df_growth_values = pd.concat([df_growth_values[mask].sort_values(f"{ranktype_list[rank_num]} Rank Value"),df_growth_values[~mask].sort_values(f"{ranktype_list[rank_num]} Rank Value", ascending=False)])
    else:
        df_growth_values = df_growth_values.sort_values(by=f"{ranktype_list[rank_num]} Rank Value", ascending=False)

    # Rank Dataframe
    df_growth_rank = pd.DataFrame()
    df_growth_rank[f"{ranktype_list[rank_num]} Rank"] = range(len(df_growth_values))
    df_growth_rank.index = df_growth_values.index

    values_df_list.append(df_growth_values)
    rank_df_list.append(df_growth_rank)

    rank_num = rank_num + 1

df_growth_values = pd.concat(values_df_list, axis=1)

# Growth Rank
df_growth_rank = pd.concat(rank_df_list, axis=1)
df_growth_rank['Defensive Rank'] = df_growth_rank.sum(axis=1)
df_growth_rank = df_growth_rank.sort_values(by="Defensive Rank", ascending=True)

# Combine back together
df_rank_both = pd.concat([df_growth_values, df_growth_rank], axis=1)
df_rank_both

Rank 0 of 5, Growth Rate (10 year)
Rank 1 of 5, Growth Quality
Rank 2 of 5, Median ROCE (10 year)
Rank 3 of 5, PE10
Rank 4 of 5, DP10


Unnamed: 0,Growth Rate (10 year) Rank Value,Growth Quality Rank Value,Median ROCE (10 year) Rank Value,PE10 Rank Value,DP10 Rank Value,Growth Rate (10 year) Rank,Growth Quality Rank,Median ROCE (10 year) Rank,PE10 Rank,DP10 Rank,Defensive Rank
GVC,52.262828,66.666667,2.444187,43.859127,38.678915,0,56,77,85,45,263
PSN,43.479873,70.0,16.395775,18.107908,34.886731,1,45,13,43,40,142
OCDO,40.704291,50.0,-0.992418,-327.2,0.0,2,90,99,99,99,389
BDEV,40.249654,80.0,8.827415,12.673141,39.29477,3,21,38,21,46,129
AHT,35.577301,93.333333,9.050433,31.640966,106.409002,4,2,36,75,88,205
TW.,31.481139,80.0,13.077221,15.385839,85.955556,5,19,19,35,83,161
IAG,29.901701,63.333333,7.287287,7.53793,35.72069,6,63,45,4,42,160
BKG,20.759521,63.333333,18.266857,14.675648,38.218365,7,59,9,32,44,151
JD.,19.286989,76.666667,18.583283,60.10101,631.060606,8,26,8,93,98,233
STJ,19.189191,66.666667,0.0,42.608855,45.756385,9,54,84,83,56,286


In [8]:
# Save to database
# Generate parameter_id and replace index
param_id_list = []
param_list = df_rank_both.columns
for param in param_list:
    
    param_id = df_params[
        df_params.param_name == param
    ].id.values[0]
    param_id_list.append(param_id)

df_rank_both.columns = param_id_list

# company id
company_id_list = []
company_list = df_rank_both.index
for company in company_list:
    
    company_id = df_companies[
        df_companies.tidm == company
    ].id.values[0]
    company_id_list.append(company_id)

df_rank_both.index = company_id_list

# Create list of columns
df_items = df_rank_both.items()
output_list = []
for label, content in df_items:
    output_list.append([content])

time_stamp_now = datetime.now()

# # Build SQL statement
col_names_list =["company_id","parameter_id","time_stamp","value"]
placeholders = ", ".join(["%s"] * len(col_names_list))
columns = ", ".join(col_names_list)
sql = f"INSERT INTO {table_name} \
    ( {columns} ) VALUES ( {placeholders} )"

# Get data from all columns and populate database
num_col = df_rank_both.shape[1]

# Iterate over date columns
for i in range(0, num_col):

    current_col = output_list[i]
    data = current_col[0]
    
    # Get data for insert
    # Date of current report
    current_parameter_id = str(data.name)

    # Iterate over data to insert into database
    for index, value in data.items():

        # Check value format
        value = str(value)
        # if value == 'nan':
            # value = None

        row = [
            str(index),
            current_parameter_id,
            time_stamp_now,
            value,
        ]

        cursor.execute(sql, row)
        db.commit()

In [9]:
# Close connections
db_connection.dispose()
db.close()