# Combine Twitter Posts

## Step 1 - Format the Play by Play Tweet Data

In [9]:
import pandas as pd

df = pd.read_csv('all_pbp_tweets.tar.gz')

# Drop the rows that are just repeats of the header
df = df.drop(df.loc[df['created_at'] == 'created_at'].index)

# Drop columns that don't contain meaninful data
df = df.drop(['location','time_zone'], axis=1)

# Convert created_at to datetime
df['UTC_Datetime'] = pd.to_datetime(df['created_at'])

# Rename column to tweet id and make it the index
df = df.rename(columns={'cardinals_pbp_tweets.csv':'tweet_id'}).set_index('tweet_id')

# make the tweet_id numeric
df.index = pd.to_numeric(df.index)

# Drop a single annoying NA row
df = df.loc[df.index.dropna()] # Drop this annoying

# Make year and time columns
df['time'] = df['UTC_Datetime'].apply(lambda x: x.time())
df['date'] = df['UTC_Datetime'].apply(lambda x: x.date())

# Sort by the timestamp
df = df.sort_values('UTC_Datetime')

# Reorder the columns in the dataframe
df = df[['UTC_Datetime','date','time','screen_name','source','text']]

# Only Games from 2017 Season and name pbptweets
pbptweets = df.loc[df['UTC_Datetime'] > '04-Sep-2017'].copy()

In [2]:
pbptweets.head()

Unnamed: 0_level_0,UTC_Datetime,date,time,screen_name,source,text
tweet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9.059546e+17,2017-09-08 00:43:00,2017-09-08,00:43:00,patriots_pbp,Patriots Play By Play,C.Santos kicks 64 yards from KC 35 to NE 1. D....
9.059546e+17,2017-09-08 00:43:00,2017-09-08,00:43:00,chiefs_pbp,Chiefs Play By Play,C.Santos kicks 64 yards from KC 35 to NE 1. D....
9.059548e+17,2017-09-08 00:44:00,2017-09-08,00:44:00,patriots_pbp,Patriots Play By Play,1/2 (14:55) NE 12-Brady 18th season as Patriot...
9.059548e+17,2017-09-08 00:44:00,2017-09-08,00:44:00,patriots_pbp,Patriots Play By Play,2/2 T.Brady pass incomplete deep left to D.All...
9.059548e+17,2017-09-08 00:44:01,2017-09-08,00:44:01,chiefs_pbp,Chiefs Play By Play,1/2 (14:55) NE 12-Brady 18th season as Patriot...


## Step 2 Load the Win Percentage Data and Try to Match Up

** One thing to keep in mind - The Twitter timestamps are UTC. The game Dates are the date the game STARTED. If a game went past midnight - or changed dates in UTC times we might have a problem **

In [2]:
winpct = pd.read_csv('All_Games_Win_Pct.csv')

# Call the play text the same thing as the other dataframe
winpct['text'] = winpct['playtext']

# Convert date to datetime
winpct['date'] = pd.to_datetime(winpct['Game Date'])

In [4]:
merged = winpct.merge(pbptweets, how='left', on=['text'])

In [5]:
winpct['text'][7]

'C.Boswell kicks 65 yards from PIT 35 to end zone, Touchback.'

In [6]:
print(len(pbptweets['text']))
print(len(pbptweets['text'].drop_duplicates()))

79409
42722


In [7]:
# Keep only unique play text, keep the first timestamp
pbptweets = pbptweets.drop_duplicates(subset='text', keep='first')

In [8]:
import re
# Find all the tweets mentioning "SANTOS"
pbptweets.loc[pbptweets['text'].apply(lambda x: any(re.findall('Santos',x)))][['date','screen_name','text']]

Unnamed: 0_level_0,date,screen_name,text
tweet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
9.059546e+17,2017-09-08,patriots_pbp,C.Santos kicks 64 yards from KC 35 to NE 1. D....
9.059629e+17,2017-09-08,patriots_pbp,C.Santos extra point is GOOD Center-J.Winchest...
9.059637e+17,2017-09-08,patriots_pbp,C.Santos kicks 65 yards from KC 35 to end zone...
9.05978e+17,2017-09-08,chiefs_pbp,C.Santos kicks 57 yards from KC 35 to NE 8. D....
9.094799e+17,2017-09-17,chiefs_pbp,(11:26) C.Santos 34 yard field goal is GOOD Ce...
9.094799e+17,2017-09-17,eagles_pbp,C.Santos kicks 66 yards from KC 35 to PHI -1. ...
9.094851e+17,2017-09-17,eagles_pbp,(:21) C.Santos 39 yard field goal is GOOD Cent...
9.094852e+17,2017-09-17,eagles_pbp,C.Santos kicks 65 yards from KC 35 to end zone...
9.094935e+17,2017-09-17,eagles_pbp,C.Santos extra point is GOOD Center-J.Winchest...
9.09495e+17,2017-09-17,chiefs_pbp,C.Santos kicks 74 yards from KC 20 to PHI 6. W...


In [9]:
print(len(pbptweets))

42722


In [10]:
# Find all the plays mentioning Santos
winpct.loc[winpct['text'].apply(lambda x: any(re.findall('Santos',x)))][['playId','homeWinPercentage','playtext','date']]

Unnamed: 0,playId,homeWinPercentage,playtext,date
1052,40095156644,0.754,C.Santos kicks 64 yards from KC 35 to NE 1. D....,2017-09-07
1085,400951566938,0.707,Demetrius Harris Pass From Alex Smith for 7 Yr...,2017-09-07
1086,400951566974,0.711,C.Santos kicks 65 yards from KC 35 to end zone...,2017-09-07
1142,4009515662317,0.724,Kareem Hunt Pass From Alex Smith for 3 Yrds C....,2017-09-07
1143,4009515662353,0.72,C.Santos kicks 57 yards from KC 35 to NE 8. D....,2017-09-07
1159,4009515662756,0.461,Tyreek Hill Pass From Alex Smith for 75 Yrds C...,2017-09-07
1160,4009515662792,0.468,C.Santos kicks 65 yards from KC 35 to end zone...,2017-09-07
1196,4009515663725,0.523,Kareem Hunt Pass From Alex Smith for 78 Yrds C...,2017-09-07
1197,4009515663761,0.531,C.Santos kicks 65 yards from KC 35 to end zone...,2017-09-07
1221,4009515664314,0.101,Kareem Hunt 4 Yard Rush C.Santos extra point i...,2017-09-07


In [11]:
# There are a lot more plays mentioning Stants than there are tweets? What's the deal?

In [12]:
santos_tweets = pbptweets.loc[pbptweets['text'].apply(lambda x: any(re.findall('Santos',x)))][['date','screen_name','text']]
santos_winpct = winpct.loc[winpct['text'].apply(lambda x: any(re.findall('Santos',x)))][['playId','homeWinPercentage','playtext','date']]

In [13]:
santos_tweets['date'] = pd.to_datetime(santos_tweets['date'])

In [14]:
santos_tweets.loc[santos_tweets['date'] == '2017-09-08']['text'].unique() # only 2017-09-08 Tweets

array(['C.Santos kicks 64 yards from KC 35 to NE 1. D.Lewis to NE 27 for 26 yards (K.Pierre-Louis D.Harris). #NEvsKC',
       'C.Santos extra point is GOOD Center-J.Winchester Holder-D.Colquitt. #NEvsKC',
       'C.Santos kicks 65 yards from KC 35 to end zone Touchback. #NEvsKC',
       'C.Santos kicks 57 yards from KC 35 to NE 8. D.Lewis to NE 18 for 10 yards (D.Sorensen). #NEvsKC'],
      dtype=object)

In [15]:
santos_winpct.loc[santos_winpct['date'] == '2017-09-07']['playtext'].unique()

array(['C.Santos kicks 64 yards from KC 35 to NE 1. D.Lewis to NE 27 for 26 yards (K.Pierre-Louis, D.Harris).',
       'Demetrius Harris Pass From Alex Smith for 7 Yrds C.Santos extra point is GOOD',
       'C.Santos kicks 65 yards from KC 35 to end zone, Touchback.',
       'Kareem Hunt Pass From Alex Smith for 3 Yrds C.Santos extra point is GOOD',
       'C.Santos kicks 57 yards from KC 35 to NE 8. D.Lewis to NE 18 for 10 yards (D.Sorensen).',
       'Tyreek Hill Pass From Alex Smith for 75 Yrds C.Santos extra point is GOOD',
       'Kareem Hunt Pass From Alex Smith for 78 Yrds C.Santos extra point is GOOD',
       'Kareem Hunt 4 Yard Rush C.Santos extra point is GOOD',
       'Charcandrick West 21 Yard Rush C.Santos extra point is GOOD',
       'C.Santos kicks 64 yards from KC 35 to NE 1. D.Lewis to NE 14 for 13 yards (U.Eligwe).'],
      dtype=object)

# Pull from one game to compare

In [16]:
pats_chiefs_nov8_tweets = pbptweets.loc[(pbptweets['screen_name'] == 'patriots_pbp') | 
                                        (pbptweets['screen_name'] == 'chiefs_pbp')]

In [17]:
pats_chiefs_nov8_tweets = pats_chiefs_nov8_tweets.loc[pats_chiefs_nov8_tweets['UTC_Datetime'] <= '09-10-2017']

In [18]:
pats_chiefs_nov8_plays = winpct.loc[winpct['Game Title'] == 'Chiefs vs. Patriots ']

In [19]:
# These are pretty close....
print(len(pats_chiefs_nov8_tweets), "Unique Tweets on Gameday")
print(len(pats_chiefs_nov8_plays), "Unique Plays on Gameday")

198 Unique Tweets on Gameday
193 Unique Plays on Gameday


In [20]:
pats_chiefs_nov8_tweets = pats_chiefs_nov8_tweets.reset_index().drop('tweet_id', axis=1)

In [21]:
pats_chiefs_nov8_plays = pats_chiefs_nov8_plays.reset_index()[['homeWinPercentage',
                                      'secondsLeft',
                                      'homeScore',
                                      'awayScore',
                                      'clockdisplayValue',
                                      'text']]

In [22]:
pats_chiefs_nov8_plays.head()

Unnamed: 0,homeWinPercentage,secondsLeft,homeScore,awayScore,clockdisplayValue,text
0,0.754,0,0,0,14:55,C.Santos kicks 64 yards from KC 35 to NE 1. D....
1,0.73,0,0,0,14:55,(14:55) T.Brady pass incomplete deep left to D...
2,0.748,0,0,0,14:49,(14:49) T.Brady pass short right to R.Burkhead...
3,0.769,0,0,0,14:14,(14:14) (Shotgun) J.White left guard to NE 43 ...
4,0.754,0,0,0,13:52,"(13:52) (No Huddle, Shotgun) J.White up the mi..."


In [23]:
joined = pats_chiefs_nov8_plays.join(pats_chiefs_nov8_tweets, how='outer', rsuffix='tweets')

In [24]:
joined[['text',
        'texttweets',
        'UTC_Datetime',
        'homeWinPercentage',
        'homeScore',
        'awayScore',
        'clockdisplayValue']].to_csv('Pats_Chiefs_TwittervsPlays.csv')

# Create Columns that will help joining

In [10]:
winpct[['Game Title','date']].head()

Unnamed: 0,Game Title,date
0,Titans vs. Steelers,2017-11-16
1,Titans vs. Steelers,2017-11-16
2,Titans vs. Steelers,2017-11-16
3,Titans vs. Steelers,2017-11-16
4,Titans vs. Steelers,2017-11-16


In [11]:
pbptweets['count'] = 1

In [12]:
pbptweets.head()

Unnamed: 0_level_0,UTC_Datetime,date,time,screen_name,source,text,count
tweet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
9.059546e+17,2017-09-08 00:43:00,2017-09-08,00:43:00,patriots_pbp,Patriots Play By Play,C.Santos kicks 64 yards from KC 35 to NE 1. D....,1
9.059546e+17,2017-09-08 00:43:00,2017-09-08,00:43:00,chiefs_pbp,Chiefs Play By Play,C.Santos kicks 64 yards from KC 35 to NE 1. D....,1
9.059548e+17,2017-09-08 00:44:00,2017-09-08,00:44:00,patriots_pbp,Patriots Play By Play,1/2 (14:55) NE 12-Brady 18th season as Patriot...,1
9.059548e+17,2017-09-08 00:44:00,2017-09-08,00:44:00,patriots_pbp,Patriots Play By Play,2/2 T.Brady pass incomplete deep left to D.All...,1
9.059548e+17,2017-09-08 00:44:01,2017-09-08,00:44:01,chiefs_pbp,Chiefs Play By Play,1/2 (14:55) NE 12-Brady 18th season as Patriot...,1


In [70]:
from datetime import timedelta

pbptweets['date'] = pbptweets['UTC_Datetime'].apply(lambda x: (x - timedelta(hours=12)).date())

pbptweets['Team1'] = pbptweets['source'].apply(lambda x: x.split(' ')[0])
pbptweets['Team2'] = pbptweets['source'].apply(lambda x: x.split(' ')[0])



winpct = pd.read_csv('All_Games_Win_Pct.csv')

# Call the play text the same thing as the other dataframe
winpct['text'] = winpct['playtext']

# Convert date to datetime
winpct['date'] = pd.to_datetime(winpct['Game Date'])

winpct['Team1'] = winpct['Game Title'].apply(lambda x: x.split(' ')[0])
winpct['Team2'] = winpct['Game Title'].apply(lambda x: x.split(' ')[2])

winpct['Game Title Date']  = winpct.apply(lambda x: x['Game Title'] + ' ' + x['date'].strftime('%Y-%m-%d'), axis =1)

# Make both dates a pandas datetime
winpct['date'] = pd.to_datetime(winpct['date'])
pbptweets['date'] = pd.to_datetime(pbptweets['date'])

gamelist = winpct.drop_duplicates(subset='Game Title Date')

tweetmerge = pbptweets.merge(gamelist[['Game Title Date','date','Team1']], how='inner', on=['date','Team1'])
tweetmerge2 = pbptweets.merge(gamelist[['Game Title Date','date','Team2']], how='inner', on=['date','Team2'])

tweets_gametitle = pd.concat([tweetmerge, tweetmerge2])

In [87]:
winpct[['playId','text','Game Title Date']].head()

Unnamed: 0,playId,text,Game Title Date
0,40095169837,R.Succop kicks 65 yards from TEN 35 to end zon...,Titans vs. Steelers 2017-11-16
1,40095169852,(15:00) (Shotgun) B.Roethlisberger pass short ...,Titans vs. Steelers 2017-11-16
2,40095169876,"(14:23) (No Huddle, Shotgun) B.Roethlisberger ...",Titans vs. Steelers 2017-11-16
3,400951698100,"(13:46) (No Huddle, Shotgun) B.Roethlisberger ...",Titans vs. Steelers 2017-11-16
4,400951698124,"(13:15) (No Huddle, Shotgun) L.Bell up the mid...",Titans vs. Steelers 2017-11-16


In [88]:
tweets_gametitle

Unnamed: 0,UTC_Datetime,date,time,screen_name,source,text,count,date_fromutc,Team1,Team2,Game Title Date
0,2017-09-08 00:43:00,2017-09-07,00:43:00,chiefs_pbp,Chiefs Play By Play,C.Santos kicks 64 yards from KC 35 to NE 1. D....,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
1,2017-09-08 00:44:01,2017-09-07,00:44:01,chiefs_pbp,Chiefs Play By Play,1/2 (14:55) NE 12-Brady 18th season as Patriot...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
2,2017-09-08 00:44:01,2017-09-07,00:44:01,chiefs_pbp,Chiefs Play By Play,2/2 T.Brady pass incomplete deep left to D.All...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
3,2017-09-08 00:44:02,2017-09-07,00:44:02,chiefs_pbp,Chiefs Play By Play,(14:49) T.Brady pass short right to R.Burkhead...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
4,2017-09-08 00:45:00,2017-09-07,00:45:00,chiefs_pbp,Chiefs Play By Play,(14:14) (Shotgun) J.White left guard to NE 43 ...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
5,2017-09-08 00:45:01,2017-09-07,00:45:01,chiefs_pbp,Chiefs Play By Play,(13:52) (No Huddle Shotgun) J.White up the mid...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
6,2017-09-08 00:46:00,2017-09-07,00:46:00,chiefs_pbp,Chiefs Play By Play,(13:26) (No Huddle Shotgun) T.Brady pass deep ...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
7,2017-09-08 00:46:01,2017-09-07,00:46:01,chiefs_pbp,Chiefs Play By Play,(13:02) (No Huddle) J.White left tackle to KC ...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
8,2017-09-08 00:47:00,2017-09-07,00:47:00,chiefs_pbp,Chiefs Play By Play,(12:35) (No Huddle Shotgun) T.Brady pass short...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07
9,2017-09-08 00:47:01,2017-09-07,00:47:01,chiefs_pbp,Chiefs Play By Play,2/2 yards enforced at KC 14 - No Play. #NEvsKC,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07


In [89]:
winpct.head()

Unnamed: 0,playId,homeWinPercentage,secondsLeft,tiePercentage,awayScore,clockdisplayValue,homeScore,periodnumber,startdistance,startdown,...,playtext,playtypeabbreviation,playtypeid,Game Title,Game Date,text,date,Team1,Team2,Game Title Date
0,40095169837,0.773,0,0,0,15:00,0,1,0,0,...,R.Succop kicks 65 yards from TEN 35 to end zon...,K,53.0,Titans vs. Steelers,"November 16, 2017",R.Succop kicks 65 yards from TEN 35 to end zon...,2017-11-16,Titans,Steelers,Titans vs. Steelers 2017-11-16
1,40095169852,0.785,0,0,0,15:00,0,1,10,1,...,(15:00) (Shotgun) B.Roethlisberger pass short ...,REC,24.0,Titans vs. Steelers,"November 16, 2017",(15:00) (Shotgun) B.Roethlisberger pass short ...,2017-11-16,Titans,Steelers,Titans vs. Steelers 2017-11-16
2,40095169876,0.794,0,0,0,14:23,0,1,10,1,...,"(14:23) (No Huddle, Shotgun) B.Roethlisberger ...",REC,24.0,Titans vs. Steelers,"November 16, 2017","(14:23) (No Huddle, Shotgun) B.Roethlisberger ...",2017-11-16,Titans,Steelers,Titans vs. Steelers 2017-11-16
3,400951698100,0.795,0,0,0,13:46,0,1,10,1,...,"(13:46) (No Huddle, Shotgun) B.Roethlisberger ...",REC,24.0,Titans vs. Steelers,"November 16, 2017","(13:46) (No Huddle, Shotgun) B.Roethlisberger ...",2017-11-16,Titans,Steelers,Titans vs. Steelers 2017-11-16
4,400951698124,0.801,0,0,0,13:15,0,1,3,2,...,"(13:15) (No Huddle, Shotgun) L.Bell up the mid...",RUSH,5.0,Titans vs. Steelers,"November 16, 2017","(13:15) (No Huddle, Shotgun) L.Bell up the mid...",2017-11-16,Titans,Steelers,Titans vs. Steelers 2017-11-16


In [90]:
tweets_gametitle.merge(winpct[['playId','text','Game Title Date']],
                       how='left',
                       left_on=['Game Title Date','text'],
                       right_on=['Game Title Date','text'])

Unnamed: 0,UTC_Datetime,date,time,screen_name,source,text,count,date_fromutc,Team1,Team2,Game Title Date,playId
0,2017-09-08 00:43:00,2017-09-07,00:43:00,chiefs_pbp,Chiefs Play By Play,C.Santos kicks 64 yards from KC 35 to NE 1. D....,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
1,2017-09-08 00:44:01,2017-09-07,00:44:01,chiefs_pbp,Chiefs Play By Play,1/2 (14:55) NE 12-Brady 18th season as Patriot...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
2,2017-09-08 00:44:01,2017-09-07,00:44:01,chiefs_pbp,Chiefs Play By Play,2/2 T.Brady pass incomplete deep left to D.All...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
3,2017-09-08 00:44:02,2017-09-07,00:44:02,chiefs_pbp,Chiefs Play By Play,(14:49) T.Brady pass short right to R.Burkhead...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
4,2017-09-08 00:45:00,2017-09-07,00:45:00,chiefs_pbp,Chiefs Play By Play,(14:14) (Shotgun) J.White left guard to NE 43 ...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
5,2017-09-08 00:45:01,2017-09-07,00:45:01,chiefs_pbp,Chiefs Play By Play,(13:52) (No Huddle Shotgun) J.White up the mid...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
6,2017-09-08 00:46:00,2017-09-07,00:46:00,chiefs_pbp,Chiefs Play By Play,(13:26) (No Huddle Shotgun) T.Brady pass deep ...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
7,2017-09-08 00:46:01,2017-09-07,00:46:01,chiefs_pbp,Chiefs Play By Play,(13:02) (No Huddle) J.White left tackle to KC ...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
8,2017-09-08 00:47:00,2017-09-07,00:47:00,chiefs_pbp,Chiefs Play By Play,(12:35) (No Huddle Shotgun) T.Brady pass short...,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,
9,2017-09-08 00:47:01,2017-09-07,00:47:01,chiefs_pbp,Chiefs Play By Play,2/2 yards enforced at KC 14 - No Play. #NEvsKC,1,2017-09-07,Chiefs,Chiefs,Chiefs vs. Patriots 2017-09-07,


In [97]:
df1 = tweets_gametitle.groupby('Game Title Date')['date','source'].count()

In [100]:
df1 = df1.rename(columns={'date': 'Tweet_Count'})

In [102]:
df2 = winpct.groupby('Game Title Date')['date','Team1'].count()

In [104]:
df2 = df2.rename(columns={'date': 'ESPN_Play_Count'})

In [106]:
counts_compare = df1.join(df2)

In [110]:
counts_compare = counts_compare.drop(['source', 'Team1'], axis=1)

In [112]:
counts_compare['Count_Diff'] = counts_compare['Tweet_Count'] - counts_compare['ESPN_Play_Count']

In [136]:
%matplotlib inline
counts_compare.sort_values('Count_Diff', ascending=False)

Unnamed: 0_level_0,Tweet_Count,ESPN_Play_Count,Count_Diff
Game Title Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Broncos vs. Bills 2017-09-24,449,185,264
Giants vs. 49ers 2017-11-12,424,161,263
Dolphins vs. Bills 2017-12-17,430,175,255
Lions vs. Saints 2017-10-15,451,199,252
Redskins vs. Seahawks 2017-11-05,435,189,246
Bears vs. Ravens 2017-10-15,448,203,245
49ers vs. Cardinals 2017-10-01,479,235,244
Raiders vs. Bills 2017-10-29,408,170,238
Cardinals vs. 49ers 2017-11-05,428,191,237
Redskins vs. Saints 2017-11-19,423,188,235


In [146]:
BroncosBillsTweets = tweets_gametitle.loc[tweets_gametitle['Game Title Date'] == 'Broncos vs. Bills  2017-09-24']

In [156]:
BroncosBillsPct = winpct.loc[winpct['Game Title Date'] == 'Broncos vs. Bills  2017-09-24'][['playId','text','homeWinPercentage']]

In [157]:
BroncosBillsPct.head()

Unnamed: 0,playId,text,homeWinPercentage
25441,40095158336,S.Hauschka kicks 65 yards from BUF 35 to end z...,0.463
25442,40095158355,(15:00) T.Siemian pass short right to V.Green ...,0.5
25443,40095158383,"(14:27) PENALTY on DEN-A.Janovich, False Start...",0.508
25444,400951583106,(14:11) (Shotgun) T.Siemian pass short right t...,0.514
25445,400951583134,(13:33) (Shotgun) C.Anderson up the middle to ...,0.536


In [203]:
BroncosBillsPct['text30'] = BroncosBillsPct['text'].apply(lambda x: x[:30])

In [204]:
BroncosBillsTweets['text30'] = BroncosBillsTweets['text'].apply(lambda x: x[:30])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [175]:
BroncosBillsPct.loc[25441]['text30']

'S.Hauschka kicks 65 yards from'

In [178]:
BroncosBillsTweets.loc[4365]['text30']

'S.Hauschka kicks 65 yards from'

In [200]:
BroncosBillsTweets = BroncosBillsTweets.drop_duplicates(subset=['text'])

In [205]:
print(len(BroncosBillsTweets['text30']))
print(len(BroncosBillsTweets['text30'].unique()))

240
225


In [206]:
BroncosBillsPct.merge(BroncosBillsTweets, how='left', on='text30')

Unnamed: 0,playId,text_x,homeWinPercentage,text30,UTC_Datetime,date,time,screen_name,source,text_y,count,date_fromutc,Team1,Team2,Game Title Date
0,40095158336,S.Hauschka kicks 65 yards from BUF 35 to end z...,0.463,S.Hauschka kicks 65 yards from,2017-09-24 17:03:00,2017-09-24,17:03:00,broncos_pbp,Broncos Play By Play,S.Hauschka kicks 65 yards from BUF 35 to end z...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24
1,40095158336,S.Hauschka kicks 65 yards from BUF 35 to end z...,0.463,S.Hauschka kicks 65 yards from,2017-09-24 17:50:02,2017-09-24,17:50:02,broncos_pbp,Broncos Play By Play,S.Hauschka kicks 65 yards from BUF 35 to DEN 0...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24
2,40095158336,S.Hauschka kicks 65 yards from BUF 35 to end z...,0.463,S.Hauschka kicks 65 yards from,2017-09-24 18:19:02,2017-09-24,18:19:02,broncos_pbp,Broncos Play By Play,S.Hauschka kicks 65 yards from BUF 35 to end z...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24
3,40095158336,S.Hauschka kicks 65 yards from BUF 35 to end z...,0.463,S.Hauschka kicks 65 yards from,2017-09-24 19:10:03,2017-09-24,19:10:03,broncos_pbp,Broncos Play By Play,S.Hauschka kicks 65 yards from BUF 35 to end z...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24
4,40095158355,(15:00) T.Siemian pass short right to V.Green ...,0.500,(15:00) T.Siemian pass short r,2017-09-24 17:05:00,2017-09-24,17:05:00,broncos_pbp,Broncos Play By Play,(15:00) T.Siemian pass short right to https://...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24
5,40095158355,(15:00) T.Siemian pass short right to V.Green ...,0.500,(15:00) T.Siemian pass short r,2017-09-24 17:05:00,2017-09-24,17:05:00,bills_pbp,Bills Play By Play,(15:00) T.Siemian pass short right to https://...,1.0,2017-09-24,Bills,Bills,Broncos vs. Bills 2017-09-24
6,40095158383,"(14:27) PENALTY on DEN-A.Janovich, False Start...",0.508,(14:27) PENALTY on DEN-A.Janov,2017-09-24 17:05:01,2017-09-24,17:05:01,broncos_pbp,Broncos Play By Play,(14:27) PENALTY on DEN-A.Janovich False Start ...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24
7,400951583106,(14:11) (Shotgun) T.Siemian pass short right t...,0.514,(14:11) (Shotgun) T.Siemian pa,2017-09-24 17:06:01,2017-09-24,17:06:01,broncos_pbp,Broncos Play By Play,(14:11) (Shotgun) T.Siemian pass short right t...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24
8,400951583134,(13:33) (Shotgun) C.Anderson up the middle to ...,0.536,(13:33) (Shotgun) C.Anderson u,2017-09-24 17:06:02,2017-09-24,17:06:02,broncos_pbp,Broncos Play By Play,(13:33) (Shotgun) C.Anderson up the middle to ...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24
9,400951583155,"(13:02) R.Dixon punts 37 yards to BUF 39, Cent...",0.545,(13:02) R.Dixon punts 37 yards,2017-09-24 17:07:00,2017-09-24,17:07:00,broncos_pbp,Broncos Play By Play,(13:02) R.Dixon punts 37 yards to BUF 39 Cente...,1.0,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24


In [207]:
BroncosBillsTweets

Unnamed: 0,UTC_Datetime,date,time,screen_name,source,text,count,date_fromutc,Team1,Team2,Game Title Date,text30
4365,2017-09-24 17:03:00,2017-09-24,17:03:00,broncos_pbp,Broncos Play By Play,S.Hauschka kicks 65 yards from BUF 35 to end z...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,S.Hauschka kicks 65 yards from
4366,2017-09-24 17:05:00,2017-09-24,17:05:00,broncos_pbp,Broncos Play By Play,(15:00) T.Siemian pass short right to https://...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,(15:00) T.Siemian pass short r
4367,2017-09-24 17:05:01,2017-09-24,17:05:01,broncos_pbp,Broncos Play By Play,(14:27) PENALTY on DEN-A.Janovich False Start ...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,(14:27) PENALTY on DEN-A.Janov
4368,2017-09-24 17:06:01,2017-09-24,17:06:01,broncos_pbp,Broncos Play By Play,(14:11) (Shotgun) T.Siemian pass short right t...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,(14:11) (Shotgun) T.Siemian pa
4369,2017-09-24 17:06:02,2017-09-24,17:06:02,broncos_pbp,Broncos Play By Play,(13:33) (Shotgun) C.Anderson up the middle to ...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,(13:33) (Shotgun) C.Anderson u
4370,2017-09-24 17:07:00,2017-09-24,17:07:00,broncos_pbp,Broncos Play By Play,(13:02) R.Dixon punts 37 yards to BUF 39 Cente...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,(13:02) R.Dixon punts 37 yards
4371,2017-09-24 17:07:02,2017-09-24,17:07:02,broncos_pbp,Broncos Play By Play,(12:52) L.McCoy left tackle to BUF 42 for no g...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,(12:52) L.McCoy left tackle to
4372,2017-09-24 17:08:00,2017-09-24,17:08:00,broncos_pbp,Broncos Play By Play,(12:19) (Shotgun) T.Taylor sacked at BUF 35 fo...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,(12:19) (Shotgun) T.Taylor sac
4373,2017-09-24 17:09:02,2017-09-24,17:09:02,broncos_pbp,Broncos Play By Play,1/2 (11:44) (Shotgun) T.Taylor pass short left...,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,1/2 (11:44) (Shotgun) T.Taylor
4374,2017-09-24 17:09:03,2017-09-24,17:09:03,broncos_pbp,Broncos Play By Play,2/2 BUF 31. #BUFvsDEN,1,2017-09-24,Broncos,Broncos,Broncos vs. Bills 2017-09-24,2/2 BUF 31. #BUFvsDEN


In [209]:
BroncosBillsPct.to_csv('BB_Pct.csv')

In [210]:
BroncosBillsTweets.to_csv('BB_Tweets.csv')