# Data Analysis

### Text Mining on Earnings Calls during a Pandemic as a Means to Predict End-Of-The-Month Stock Performances
####  Olin School of Business <br> Jose Luis Rodriguez  <br> jlr@wustl.edu <br> Fall 2021

In [150]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

## Hotels, Restaurants and Leasure Stocks Tickers

In [151]:
hrl20 = pd.read_csv('data/hrl_mrk20.csv')
hrl21 = pd.read_csv('data/hrl_mrk21.csv')

In [174]:
size = []
for n in range(hrl20.shape[0]):
    cps = hrl20.iloc[n]['corpus']
    cps = [i.strip() for i in cps.split('\n') if i.strip() != '']
    size.extend(cps)
size = len(size)
rows = len(hrl20['related'])
cols = 17
d1 = 'shape: ' + '(' + str(rows) + ',' + str(cols) + ')' + '\ncorpus_size: ' + str(size)

size = []
for n in range(hrl21.shape[0]):
    cps = hrl21.iloc[n]['corpus']
    cps = [i.strip() for i in cps.split('\n') if i.strip() != '']
    size.extend(cps)
size = len(size)
rows = len(hrl21['related'])
cols = 17
d2 = 'shape: ' + '(' + str(rows) + ',' + str(cols) + ')' + '\ncorpus_size: ' + str(size)

print("DATA STATS" + '\n')
print("----- 2020 -----")
print(d1 + '\n')
print("----- 2021 -----")
print(d2)

DATA STATS

----- 2020 -----
shape: (310,17)
corpus_size: 26821

----- 2021 -----
shape: (309,17)
corpus_size: 28898


In [154]:
hrl20['date_market'] = pd.to_datetime(hrl20['date_market'])
hrl20['month_market'] = hrl20['date_market'].apply(lambda i:str(i.month).zfill(2) + '-2020')
hrl21['date_market'] = pd.to_datetime(hrl21['date_market'])
hrl21['month_market'] = hrl21['date_market'].apply(lambda i:str(i.month).zfill(2) + '-2021')

In [155]:
month_df = pd.DataFrame({'count':hrl20.groupby(['month_market','direction'])['direction'].count()}).reset_index()
fig, ax = plt.subplots(figsize=(12, 7))
sns.barplot(data=month_df, y='count', x='month_market', hue='direction')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
plt.title('Accumulated Stock Direction by Month')
plt.xlabel('Month-Year')
plt.xlabel('Count')
plt.savefig('imgs/hrl20/hrl-sent-20.png')
plt.close()

In [156]:
month_df = pd.DataFrame({'count':hrl21.groupby(['month_market','direction'])['direction'].count()}).reset_index()
fig, ax = plt.subplots(figsize=(12, 7))
sns.barplot(data=month_df, y='count', x='month_market', hue='direction')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
plt.title('Accumulated Stock Direction by Month')
plt.xlabel('Month-Year')
plt.xlabel('Count')
plt.savefig('imgs/hrl21/hrl-sent-21.png')
plt.close()

In [160]:
for secid in hrl20['related'].unique():
    p = hrl20[hrl20['related'] == secid][['date_market','change','company']].reset_index(drop=True)
    fig, ax = plt.subplots(figsize=(12, 7))
    ax.bar(p['date_market'], p['change'])
    ax.set_ylabel('Price Change')
    ax.set_title(p['company'][0])
    plt.savefig( 'imgs/hrl20/' + secid + '-hrl-20.png')
    plt.close()

In [161]:
for secid in hrl21['related'].unique():
    p = hrl21[hrl21['related'] == secid][['date_market','change','company']].reset_index(drop=True)
    fig, ax = plt.subplots(figsize=(12, 7))
    ax.bar(p['date_market'], p['change'])
    ax.set_ylabel('Price Change')
    ax.set_title(p['company'][0])
    plt.savefig( 'imgs/hrl21/' + secid + '-hrl-21.png')
    plt.close()

## Industrials - Transportation

In [50]:
trs20 = pd.read_csv('data/trs_mrk20.csv')
trs21 = pd.read_csv('data/trs_mrk20.csv')

In [157]:
trs20['date_market'] = pd.to_datetime(trs20['date_market'])
trs20['month_market'] = trs20['date_market'].apply(lambda i:str(i.month).zfill(2) + '-2020')
trs21['date_market'] = pd.to_datetime(trs21['date_market'])
trs21['month_market'] = trs21['date_market'].apply(lambda i:str(i.month).zfill(2) + '-2021')

In [175]:
size = []
for n in range(trs20.shape[0]):
    cps = trs20.iloc[n]['corpus']
    cps = [i.strip() for i in cps.split('\n') if i.strip() != '']
    size.extend(cps)
size = len(size)
rows = len(trs20['related'])
cols = 17
d1 = 'shape: ' + '(' + str(rows) + ',' + str(cols) + ')' + '\ncorpus_size: ' + str(size)

size = []
for n in range(trs21.shape[0]):
    cps = trs21.iloc[n]['corpus']
    cps = [i.strip() for i in cps.split('\n') if i.strip() != '']
    size.extend(cps)
size = len(size)
rows = len(trs21['related'])
cols = 17
d2 = 'shape: ' + '(' + str(rows) + ',' + str(cols) + ')' + '\ncorpus_size: ' + str(size)

print("DATA STATS" + '\n')
print("----- 2020 -----")
print(d1 + '\n')
print("----- 2021 -----")
print(d2)

DATA STATS

----- 2020 -----
shape: (220,17)
corpus_size: 12307

----- 2021 -----
shape: (220,17)
corpus_size: 12307


In [158]:
month_df = pd.DataFrame({'count':trs20.groupby(['month_market','direction'])['direction'].count()}).reset_index()
fig, ax = plt.subplots(figsize=(12, 7))
sns.barplot(data=month_df, y='count', x='month_market', hue='direction')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
plt.title('Accumulated Stock Direction by Month')
plt.xlabel('Month-Year')
plt.xlabel('Count')
plt.savefig('imgs/trs20/trs20-sent-20.png')
plt.close()

In [159]:
month_df = pd.DataFrame({'count':trs21.groupby(['month_market','direction'])['direction'].count()}).reset_index()
fig, ax = plt.subplots(figsize=(12, 7))
sns.barplot(data=month_df, y='count', x='month_market', hue='direction')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
plt.title('Accumulated Stock Direction by Month')
plt.xlabel('Month-Year')
plt.xlabel('Count')
plt.savefig('imgs/trs21/trs21-sent-21.png')
plt.close()

In [51]:
for secid in trs20['related'].unique():
    p = trs20[trs20['related'] == secid][['date_market','change','company']].reset_index(drop=True)
    fig, ax = plt.subplots(figsize=(12, 7))
    ax.bar(p['date_market'], p['change'])
    ax.set_ylabel('Price Change')
    ax.set_title(p['company'][0])
    plt.savefig( 'imgs/trs20/' + secid + '-trs-20.png')
    plt.close()

In [52]:
for secid in trs21['related'].unique():
    p = trs21[trs21['related'] == secid][['date_market','change','company']].reset_index(drop=True)
    fig, ax = plt.subplots(figsize=(12, 7))
    ax.bar(p['date_market'], p['change'])
    ax.set_ylabel('Price Change')
    ax.set_title(p['company'][0])
    plt.savefig( 'imgs/trs21/' + secid + '-trs-21.png')
    plt.close()