# Delta Practice

### File Read In

In [64]:
import pandas as pd

filepath1 = r'/Users/cartersocha/Downloads/instgramHashtagCounts.xlsx'
instaHashtagDf = pd.read_excel(filepath1)

filepath2 = r'/Users/cartersocha/Downloads/tweetCountTest.xlsx'
dailyTweetDf = pd.read_excel(filepath2)

filepath3 = r'/Users/cartersocha/Desktop/ReleaseData.xlsx'
releaseDf = pd.read_excel(filepath3, "ShowInfoEndStart")

filepath4 = r'/Users/cartersocha/Downloads/instgramAccountCounts.xlsx'
igAccountDf = pd.read_excel(filepath4)

filepath5 = r'/Users/cartersocha/Downloads/redditCountTest.xlsx'
redditSubsDf = pd.read_excel(filepath5)

filepath6 = r'/Users/cartersocha/Downloads/redditCountFinal.txt'
#redditCommentsDf = pd.read_excel(filepath6)

filepath7 = r'/Users/cartersocha/Downloads/googleTvCount.xlsx'
googleTrendsDf = pd.read_excel(filepath7)

### Functions

In [65]:
def DateTimeConvert(dateDf, dateColumn):
    dateDf[dateColumn] = pd.to_datetime(dateDf[dateColumn])  

    return dateDf

In [66]:
def DiffMaker(fillnaDf, valueColumn, dateColumn):
    fillnaDf.sort_values(['TvShow', dateColumn], inplace=True)

    fillnaDf['diffs'] = fillnaDf.groupby(['TvShow'])[valueColumn].transform(lambda x: x.diff()).fillna(0)

    fillnaDf.sort_index(inplace=True)

    return fillnaDf

In [67]:
def RemoveData(release,showDf):

    bigDf = pd.DataFrame()

    for i in range(len(release)):
        show = release['TvShow'][i]
        firstDate = release['Release Date'][i]
        secondDate = release['90DayDate'][i]

        smallDf = showDf[showDf['TvShow'] == show]

        newdf = smallDf[smallDf['RunDate'].between(firstDate, secondDate)]

        bigDf = bigDf.append(newdf,ignore_index=True)

    return bigDf

In [68]:
def MergeDfs(mainDf, secondDf):

    merged = pd.merge(mainDf,secondDf, how='outer', on=['TvShow'])
    return merged

In [69]:
import numpy as np

def NegativeDiffs(diffDf,columnName):

    diffDf[columnName] = np.where((diffDf[columnName] < 0), 0, diffDf[columnName])

    return diffDf

In [70]:
def SumSocialMedia(sumDf,columnName):

    summarizedDf = sumDf.groupby(columnName, as_index=False).sum()
    summarizedDf = pd.DataFrame(summarizedDf)

    return summarizedDf

### Data Transformations

##### Release Data

In [71]:
releaseDf['90DayDate'] = releaseDf['Release Date'] + pd.DateOffset(days=90)

releaseDf = DateTimeConvert(releaseDf,'90DayDate')
releaseDf = DateTimeConvert(releaseDf,'Release Date')

##### Instagram Hashtag Data

In [72]:
instaHashtagDf = DateTimeConvert(instaHashtagDf,'RunDate')

instaHashtagDf = DiffMaker(instaHashtagDf,'HashtagValue','RunDate')

instaHashDf = RemoveData(releaseDf, instaHashtagDf)
instaHashDf = NegativeDiffs(instaHashDf, 'diffs')
instaHashDf['SocialMediaSource'] = 'InstagramHashtag'
instaHashDf['SocialMediaValue'] = instaHashDf['diffs']

iHashtagDf = SumSocialMedia(instaHashDf, 'TvShow')

iHashtagDf['SocialMediaValue'] = iHashtagDf['diffs']
iHashtagDf['SocialMediaSource'] = 'InstagramHashtag'

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Twitter Data

In [73]:
dailyTweetDf = DateTimeConvert(dailyTweetDf,'RunDate')
dailyTweetDf = RemoveData(releaseDf, dailyTweetDf)
dailyTweetDf['SocialMediaValue'] = dailyTweetDf['TweetCount'] + dailyTweetDf['RetweetCount']
dailyTweetDf['SocialMediaSource'] = 'Tweets'

iTweetCountDf = SumSocialMedia(dailyTweetDf, 'TvShow')
iTweetCountDf['SocialMediaSource'] = 'Tweets'

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Reddit Comment Data

redditCommentsDf = DateTimeConvert(redditCommentsDf,'runDate')

redCommentDf = RemoveData(releaseDf, redditCommentsDf)


iRedCommentDf = SumSocialMedia(redCommentDf, 'TvShow')
iRedCommentDf['SocialMediaValue'] = redCommentDf['NumComments'] + redCommentDf['score']
iRedCommentDf['SocialMediaSource'] = 'RedditComments'

##### Instagram Account Data

In [74]:
igAccountDf = DateTimeConvert(igAccountDf,'RunDate')

igAccountDf = DiffMaker(igAccountDf,'IgAccountCounts','RunDate')


instaAccDf = RemoveData(releaseDf, igAccountDf)
instaAccDf = NegativeDiffs(instaAccDf, 'diffs')
instaAccDf['SocialMediaSource'] = 'InstagramAccount'
instaAccDf['SocialMediaValue'] = instaAccDf['diffs']

iAccountDf = SumSocialMedia(instaAccDf, 'TvShow')

iAccountDf['SocialMediaValue'] = iAccountDf['diffs']
iAccountDf['SocialMediaSource'] = 'InstagramAccount'

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Reddit Subscriber Data

In [75]:
redditSubsDf = DateTimeConvert(redditSubsDf,'RunDate')

redditSubsDf = DiffMaker(redditSubsDf,'RedditSubs','RunDate')

redSubDf = RemoveData(releaseDf, redditSubsDf)
redSubDf = NegativeDiffs(redSubDf, 'diffs')
redSubDf['SocialMediaSource'] = 'RedditSubscribers'
redSubDf['SocialMediaValue'] = redSubDf['diffs']

iRedSubDf = SumSocialMedia(redSubDf, 'TvShow')
iRedSubDf['SocialMediaValue'] = iRedSubDf['diffs']
iRedSubDf['SocialMediaSource'] = 'RedditSubscribers'

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Google Data

In [76]:
googleTrendsDf = DateTimeConvert(googleTrendsDf,'RunDate')

googDf = RemoveData(releaseDf, googleTrendsDf)
googDf = NegativeDiffs(googDf, 'GoogleValue')
googDf['SocialMediaSource'] = 'Google'
googDf['SocialMediaValue'] = googDf['GoogleValue']

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Join the Data

##### Time Value Datasets

In [78]:
instaAccDf = instaAccDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]
redSubDf = redSubDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]
dailyTweetDf = dailyTweetDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]
instaHashDf = instaHashDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]
googDf = googDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]

In [88]:
uberDf2 = pd.concat([instaAccDf,redSubDf,dailyTweetDf,instaHashDf,googDf])

uberDf3 = MergeDfs(uberDf2, releaseDf)
uberDf3

Unnamed: 0,TvShow,RunDate,SocialMediaValue,SocialMediaSource,Stream,EpisodeCount,SeasonNumber,ReleaseCadence,Release Date,90DayDate
0,ABlackLadySketchShow,2022-04-08,175.0,InstagramAccount,HBOMax,,,,2022-04-08,2022-07-07
1,ABlackLadySketchShow,2022-04-09,341.0,InstagramAccount,HBOMax,,,,2022-04-08,2022-07-07
2,ABlackLadySketchShow,2022-04-10,327.0,InstagramAccount,HBOMax,,,,2022-04-08,2022-07-07
3,ABlackLadySketchShow,2022-04-11,222.0,InstagramAccount,HBOMax,,,,2022-04-08,2022-07-07
4,ABlackLadySketchShow,2022-04-12,150.0,InstagramAccount,HBOMax,,,,2022-04-08,2022-07-07
...,...,...,...,...,...,...,...,...,...,...
18018,AboutLastNighthbo,2022-05-06,0.0,InstagramHashtag,HBOMax,,1.0,,2022-02-10,2022-05-11
18019,AboutLastNighthbo,2022-05-08,0.0,InstagramHashtag,HBOMax,,1.0,,2022-02-10,2022-05-11
18020,AboutLastNighthbo,2022-05-09,0.0,InstagramHashtag,HBOMax,,1.0,,2022-02-10,2022-05-11
18021,AboutLastNighthbo,2022-05-10,0.0,InstagramHashtag,HBOMax,,1.0,,2022-02-10,2022-05-11


##### Aggregation Datasets

In [80]:
igAccount = iAccountDf[['TvShow','SocialMediaValue','SocialMediaSource']]
redditSub = iRedSubDf[['TvShow','SocialMediaValue','SocialMediaSource']]
tweetComments = iTweetCountDf[['TvShow','SocialMediaValue','SocialMediaSource']]
igHashtag = iHashtagDf[['TvShow','SocialMediaValue','SocialMediaSource']]

##### Dataset Merging

In [81]:
nuberData = pd.concat([igAccount,redditSub,tweetComments,igHashtag],axis=0)
nuberData.head()

Unnamed: 0,TvShow,SocialMediaValue,SocialMediaSource
0,ABlackLadySketchShow,7394.0,InstagramAccount
1,Archive81,89.0,InstagramAccount
2,Atlantafx,20436.0,InstagramAccount
3,BelAirPeacock,134549.0,InstagramAccount
4,Bridgerton,1146861.0,InstagramAccount


In [82]:
uberDf = MergeDfs(releaseDf, iHashtagDf)

uberDf = MergeDfs(uberDf, iTweetCountDf)

In [83]:
uberDf.head()

Unnamed: 0,TvShow,Stream,EpisodeCount,SeasonNumber,ReleaseCadence,Release Date,90DayDate,HashtagValue,diffs,SocialMediaValue_x,SocialMediaSource_x,TweetCount,RetweetCount,SocialMediaValue_y,SocialMediaSource_y
0,ABlackLadySketchShow,HBOMax,,,,2022-04-08,2022-07-07,408351.0,464.0,464.0,InstagramHashtag,6700.0,6533.0,13233.0,Tweets
1,AboutLastNighthbo,HBOMax,,1.0,,2022-02-10,2022-05-11,334.0,1.0,1.0,InstagramHashtag,,,,
2,AdventureOfTheRing,HBOMax,,1.0,,2022-02-10,2022-05-11,14492.0,2.0,2.0,InstagramHashtag,2213.0,4444.0,6657.0,Tweets
3,AllTheOldKnives,Prime,,,,2022-04-08,2022-07-07,268196.0,2020.0,2020.0,InstagramHashtag,7416.0,5102.0,12518.0,Tweets
4,Archive81,Netflix,8.0,1.0,Binge,2022-01-14,2022-04-14,310066.0,982.0,982.0,InstagramHashtag,88380.0,48262.0,136642.0,Tweets


### Output - individuals & uber dataset

filepath2 = r'/Users/cartersocha/Downloads/uberDataset.csv'

uberDf.to_csv(filepath2) 