In [1]:
#import libraries
import pandas as pd
import numpy as np
from textblob import TextBlob
import re
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [2]:
#load the transcript data for S&P 100 for 2020 only
#read csv that includes stock data
transcribed_list= pd.read_csv('transcribed_transcript_list2.csv')
transcribed_list.head(10)
print(transcribed_list['transcript'][0])

Apple Inc. (NASDAQ:AAPL) Q4 2020 Earnings Conference Call October 29, 2020  5:00 PM ET
Company Participants 
Tejas Gala - Senior Manager, Corporate Finance and Investor Relations
Tim Cook - Chief Executive Officer
Luca Maestri - Senior Vice President and Chief Financial Officer
Conference Call Participants
Shannon Cross - Cross Research
Jeriel Ong - Deutsche Bank
Katy Huberty - Morgan Stanley
Amit Daryanani - Evercore
Samik Chatterjee - JP Morgan
Krish Sankar - Cowen and Company
Kyle McNealy - Jefferies
Chris Caso - Raymond James
Operator
Good day everyone and welcome to the Apple Inc. Fourth Quarter Fiscal Year 2020 Earnings Conference Call. Today’s call is being recorded.
At this time for opening remarks and introductions, I would like to turn things over to Tejas Gala, Senior Analyst, Corporate Finance and Investor Relations. Please go ahead, sir.
Tejas Gala
Thank you. Good afternoon and thank you for joining us. Speaking first today is Apple’s CEO, Tim Cook, and he will be followed

In [3]:
#store the data
#transcript_prep_remarks
transcribed_list.head(5)

Unnamed: 0.1,Unnamed: 0,date,ticker,title,transcript,transcript_prep_remarks,url
0,0,10/29/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q4 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q4 2020 Earnings Conf...,"['Operator', 'Good day everyone and welcome to...",/article/4382943-apple-inc-aapl-ceo-tim-cook-o...
1,1,7/30/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q3 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q3 2020 Results Confe...,"['Operator', ""Good day, everyone. Welcome to t...",/article/4362707-apple-inc-aapl-ceo-tim-cook-o...
2,2,4/30/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q2 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q2 2020 Results Confe...,"['Operator', 'Good day everyone. Welcome to th...",/article/4341792-apple-inc-aapl-ceo-tim-cook-o...
3,3,1/28/2020,AAPL,Apple Inc. (AAPL) CEO Tim Cook on Q1 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q1 2020 Results Confe...,"['Operator', ""Good day, everyone. Welcome to t...",/article/4319666-apple-inc-aapl-ceo-tim-cook-o...
4,8,10/30/2020,ABBV,AbbVie Inc.'s (ABBV) CEO Rick Gonzalez on Q3 2...,AbbVie Inc. (NYSE:ABBV) Q3 2020 Results Earnin...,"['Operator', 'Good morning and thank you for s...",/article/4383381-abbvie-inc-s-abbv-ceo-rick-go...


In [5]:
transcribed_list.dtypes

Unnamed: 0                  int64
date                       object
ticker                     object
title                      object
transcript                 object
transcript_prep_remarks    object
url                        object
dtype: object

In [6]:
#display the data


In [7]:
#clean the data



In [8]:
#create a function to get the subjectivity
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity

In [9]:
#create function to get the polarity
def getPolarity(text):
    return TextBlob(text).sentiment.polarity


In [10]:
#create new columns for subjectivity and polarity

transcribed_list['Subjectivity']= transcribed_list['transcript_prep_remarks'].apply(getSubjectivity)

transcribed_list['Polarity']= transcribed_list['transcript_prep_remarks'].apply(getPolarity)

In [11]:
transcribed_list['Subjectivity']

0      0.492110
1      0.482856
2      0.448222
3      0.464910
4      0.436565
         ...   
396    0.000000
397    0.382868
398    0.395989
399    0.408350
400    0.426653
Name: Subjectivity, Length: 401, dtype: float64

In [12]:
#create function to get sentiment score using Sentiment Intensity Analyzer

def getSIA(text):
    sia=SentimentIntensityAnalyzer()
    
    sentiment=sia.polarity_scores(text)
    return sentiment

In [13]:
compound=[]
neg= []
pos = []
neu = []
SIA = 0

for i in range(0, len(transcribed_list['transcript_prep_remarks'])):
    SIA = getSIA(transcribed_list['transcript_prep_remarks'][i])
    compound.append(SIA['compound'])
    neg.append(SIA['neg'])
    neu.append(SIA['neu'])
    pos.append(SIA['pos'])

In [14]:
pos[:3]

[0.167, 0.171, 0.157]

In [15]:
SIA

{'neg': 0.029, 'neu': 0.817, 'pos': 0.154, 'compound': 1.0}

In [16]:
#store sentiment scores in dataframe
transcribed_list['Compound']=compound
transcribed_list['Negative']=neg
transcribed_list['Positive']=pos
transcribed_list['Neutral']=neu

In [17]:
#store sentiment scores in dataframe
transcribed_list['Compound']=compound
transcribed_list['Negative']=neg
transcribed_list['Positive']=pos
transcribed_list['Neutral']=neu

In [18]:
transcribed_list.set_index(['date', 'ticker'], inplace =True)
transcribed_list
#transcribed_list.columns

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,title,transcript,transcript_prep_remarks,url,Subjectivity,Polarity,Compound,Negative,Positive,Neutral
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
10/29/2020,AAPL,0,Apple Inc. (AAPL) CEO Tim Cook on Q4 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q4 2020 Earnings Conf...,"['Operator', 'Good day everyone and welcome to...",/article/4382943-apple-inc-aapl-ceo-tim-cook-o...,0.492110,0.168114,1.0000,0.036,0.167,0.797
7/30/2020,AAPL,1,Apple Inc. (AAPL) CEO Tim Cook on Q3 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q3 2020 Results Confe...,"['Operator', ""Good day, everyone. Welcome to t...",/article/4362707-apple-inc-aapl-ceo-tim-cook-o...,0.482856,0.164288,1.0000,0.028,0.171,0.801
4/30/2020,AAPL,2,Apple Inc. (AAPL) CEO Tim Cook on Q2 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q2 2020 Results Confe...,"['Operator', 'Good day everyone. Welcome to th...",/article/4341792-apple-inc-aapl-ceo-tim-cook-o...,0.448222,0.193248,1.0000,0.030,0.157,0.813
1/28/2020,AAPL,3,Apple Inc. (AAPL) CEO Tim Cook on Q1 2020 Resu...,Apple Inc. (NASDAQ:AAPL) Q1 2020 Results Confe...,"['Operator', ""Good day, everyone. Welcome to t...",/article/4319666-apple-inc-aapl-ceo-tim-cook-o...,0.464910,0.192856,0.9999,0.024,0.165,0.811
10/30/2020,ABBV,8,AbbVie Inc.'s (ABBV) CEO Rick Gonzalez on Q3 2...,AbbVie Inc. (NYSE:ABBV) Q3 2020 Results Earnin...,"['Operator', 'Good morning and thank you for s...",/article/4383381-abbvie-inc-s-abbv-ceo-rick-go...,0.436565,0.143596,0.9999,0.017,0.133,0.850
...,...,...,...,...,...,...,...,...,...,...,...,...
5/19/2020,WMT,1031,"Walmart, Inc. (WMT) CEO Doug McMillon on Q1 20...","Walmart, Inc. (NYSE:WMT) Q1 2021 Results Conf...",[],/article/4348814-walmart-inc-wmt-ceo-doug-mcmi...,0.000000,0.000000,0.0000,0.000,0.000,1.000
10/30/2020,XOM,1035,Exxon Mobil Corporation (XOM) Q3 2020 Results ...,Exxon Mobil Corporation (NYSE:XOM) Q3 2020 Ear...,"['Operator', ""Good day everyone and welcome to...",/article/4383411-exxon-mobil-corporation-xom-q...,0.382868,0.092255,0.9998,0.044,0.114,0.842
7/31/2020,XOM,1037,Exxon Mobil Corp (XOM) Q2 2020 Results - Earni...,Exxon Mobil Corp (NYSE:XOM) Q2 2020 Earnings C...,"['Operator', ""Good day, everyone. Welcome to t...",/article/4363013-exxon-mobil-corp-xom-q2-2020-...,0.395989,0.093709,0.9998,0.041,0.099,0.860
5/1/2020,XOM,1039,Exxon Mobil Corporation (XOM) CEO Darren Woods...,Exxon Mobil Corporation (NYSE:XOM) Q1 2020 Ear...,"['Operator', ""Good day, everyone, and welcome ...",/article/4342148-exxon-mobil-corporation-xom-c...,0.408350,0.133240,0.9999,0.045,0.133,0.823


In [19]:
transcribed_keep=transcribed_list.drop(['title','Unnamed: 0','transcript',
       'transcript_prep_remarks', 'url'],1)
transcribed_keep

Unnamed: 0_level_0,Unnamed: 1_level_0,Subjectivity,Polarity,Compound,Negative,Positive,Neutral
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
10/29/2020,AAPL,0.492110,0.168114,1.0000,0.036,0.167,0.797
7/30/2020,AAPL,0.482856,0.164288,1.0000,0.028,0.171,0.801
4/30/2020,AAPL,0.448222,0.193248,1.0000,0.030,0.157,0.813
1/28/2020,AAPL,0.464910,0.192856,0.9999,0.024,0.165,0.811
10/30/2020,ABBV,0.436565,0.143596,0.9999,0.017,0.133,0.850
...,...,...,...,...,...,...,...
5/19/2020,WMT,0.000000,0.000000,0.0000,0.000,0.000,1.000
10/30/2020,XOM,0.382868,0.092255,0.9998,0.044,0.114,0.842
7/31/2020,XOM,0.395989,0.093709,0.9998,0.041,0.099,0.860
5/1/2020,XOM,0.408350,0.133240,0.9999,0.045,0.133,0.823


In [20]:
#obj_df["OHC_Code"] = np.where(obj_df["engine_type"].str.contains("ohc"), 1, 0)
#obj_df[["make", "engine_type", "OHC_Code"]].head()

In [21]:
#made label column that assigns 1 to positive values over 0.10

#sample code
#obj_df["OHC_Code"] = np.where(obj_df["engine_type"].str.contains("ohc"), 1, 0)
#obj_df[["make", "engine_type", "OHC_Code"]].head()


transcribed_keep['Label']= np.where(transcribed_keep['Positive']>0.100, 1, 0)

In [22]:
transcribed_keep['Label'].shape

(401,)

In [23]:
transcribed_keep

Unnamed: 0_level_0,Unnamed: 1_level_0,Subjectivity,Polarity,Compound,Negative,Positive,Neutral,Label
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10/29/2020,AAPL,0.492110,0.168114,1.0000,0.036,0.167,0.797,1
7/30/2020,AAPL,0.482856,0.164288,1.0000,0.028,0.171,0.801,1
4/30/2020,AAPL,0.448222,0.193248,1.0000,0.030,0.157,0.813,1
1/28/2020,AAPL,0.464910,0.192856,0.9999,0.024,0.165,0.811,1
10/30/2020,ABBV,0.436565,0.143596,0.9999,0.017,0.133,0.850,1
...,...,...,...,...,...,...,...,...
5/19/2020,WMT,0.000000,0.000000,0.0000,0.000,0.000,1.000,0
10/30/2020,XOM,0.382868,0.092255,0.9998,0.044,0.114,0.842,1
7/31/2020,XOM,0.395989,0.093709,0.9998,0.041,0.099,0.860,0
5/1/2020,XOM,0.408350,0.133240,0.9999,0.045,0.133,0.823,1


In [24]:
#create featured data set
X= transcribed_keep
X=np.array(X.drop(['Label'], 1))

#create target data set
y=np.array(transcribed_keep['Label'])

In [25]:
X

array([[0.49211034, 0.16811379, 1.        , 0.036     , 0.167     ,
        0.797     ],
       [0.48285643, 0.16428827, 1.        , 0.028     , 0.171     ,
        0.801     ],
       [0.44822245, 0.19324771, 1.        , 0.03      , 0.157     ,
        0.813     ],
       ...,
       [0.39598902, 0.09370907, 0.9998    , 0.041     , 0.099     ,
        0.86      ],
       [0.40835039, 0.13324044, 0.9999    , 0.045     , 0.133     ,
        0.823     ],
       [0.42665329, 0.11859143, 1.        , 0.029     , 0.154     ,
        0.817     ]])

In [26]:
y

array([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,

In [27]:
#split the data into 80% training and 20% testing data sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=0)

In [28]:
#create and train the model
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
model=LinearDiscriminantAnalysis().fit(x_train, y_train)

In [29]:
#get the models predictions and classifications
predictions= model.predict(x_test)
predictions

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [30]:
#show the models metrics 
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      0.33      0.50         9
           1       0.92      1.00      0.96        72

    accuracy                           0.93        81
   macro avg       0.96      0.67      0.73        81
weighted avg       0.93      0.93      0.91        81

