# Train Graph NN on Call Mentions

Considering direct mentions of companies in calls, construct a network of calls

In [10]:
# Packages
import pandas as pd
import os

## Load Mentions Data

In [11]:
# Load '../../../Data/Company_Mentions/Company_Mentions_With_Ticker.xlsx'
company_mentions_with_ticker = pd.read_excel('../../../Data/Company_Mentions/Company_Mentions_With_Ticker.xlsx')
company_mentions_with_ticker

Unnamed: 0,ticker,fixed_quarter_date,company_mentioned,count,matched_ticker,Corporation_clean
0,NEE,2012-01-01,PTC,1,PTC,PTC
1,NEE,2012-04-01,MOODY'S,1,MCO,MOODY'S
2,NEE,2012-04-01,PTC,1,PTC,PTC
3,NEE,2012-07-01,ALLIANT,2,LNT,ALLIANT
4,NEE,2012-10-01,PTC,2,PTC,PTC
...,...,...,...,...,...,...
3300,KTOS,2014-10-01,NORTHROP GRUMMAN,1,NOC,NORTHROP GRUMMAN
3301,KTOS,2015-01-01,NORTHROP GRUMMAN,2,NOC,NORTHROP GRUMMAN
3302,KTOS,2015-07-01,GOOGLE,1,GOOG,GOOGLE
3303,KTOS,2015-10-01,GOOGLE,1,GOOG,GOOGLE


## Get pairwise tickers and calls

In [12]:
pairwise_df = (company_mentions_with_ticker[['ticker', 'matched_ticker', 'fixed_quarter_date']].rename(columns={'ticker': 'ticker1', 'matched_ticker': 'ticker2'})
                                                                         .value_counts()
                                                                         .reset_index()
                                                                         .rename(columns={0: 'count'}))

# Order doesn't matter!

# Iterate over rows, create sorted list of tickers
pairwise_df['sorted_tickers'] = pairwise_df[['ticker1', 'ticker2']].apply(lambda x: sorted(x), axis=1)

# Sort the rows by the sorted_tickers column
pairwise_df = pairwise_df.sort_values('sorted_tickers')

# Duplicates on sorted_tickers
#print(pairwise_df[pairwise_df.duplicated('sorted_tickers')])

# Split sorted tickers into two columns again
pairwise_df[['ticker1', 'ticker2']] = pd.DataFrame(pairwise_df['sorted_tickers'].tolist(), index=pairwise_df.index)

# Print duplicates on ticker1 and ticker2
#print(pairwise_df[pairwise_df.duplicated(['ticker1', 'ticker2'])])

# Collapse to sums of count by ticker1 and ticker2
pairwise_df = pairwise_df.groupby(['ticker1', 'ticker2', 'fixed_quarter_date']).agg({'count': 'sum'}).reset_index()

# Create column tikcer1_fixed_quarter_date and ticker2_fixed_quarter_date that concatenate ticker1 and fixed_quarter_date and ticker2 and fixed_quarter_date
pairwise_df['ticker1_fixed_quarter_date'] = pairwise_df['ticker1'] + ' : ' + pairwise_df['fixed_quarter_date'].astype(str)
pairwise_df['ticker2_fixed_quarter_date'] = pairwise_df['ticker2'] + ' : ' + pairwise_df['fixed_quarter_date'].astype(str)
# Keep just these columns
pairwise_df = pairwise_df[['ticker1_fixed_quarter_date', 'ticker2_fixed_quarter_date']]

pairwise_df

Unnamed: 0,ticker1_fixed_quarter_date,ticker2_fixed_quarter_date
0,AAPL : 2015-01-01,ACIW : 2015-01-01
1,AAPL : 2012-04-01,ADP : 2012-04-01
2,AAPL : 2014-07-01,ADSK : 2014-07-01
3,AAPL : 2016-01-01,ADSK : 2016-01-01
4,AAPL : 2015-01-01,ALGT : 2015-01-01
...,...,...
2681,TMUS : 2012-04-01,VMI : 2012-04-01
2682,TMUS : 2012-10-01,VMI : 2012-10-01
2683,TOL : 2014-01-01,VGR : 2014-01-01
2684,WEC : 2013-01-01,XEL : 2013-01-01
