# Delta Practice

### File Read In

In [107]:
import pandas as pd
from datetime import timedelta

filepath1 = r'/Users/cartersocha/Downloads/instgramHashtagCounts.xlsx'
instaHashtagDf = pd.read_excel(filepath1)

filepath2 = r'/Users/cartersocha/Downloads/tweetCountTest.xlsx'
dailyTweetDf = pd.read_excel(filepath2)

filepath3 = r'/Users/cartersocha/Desktop/ReleaseData.xlsx'
releaseDf = pd.read_excel(filepath3, "ShowInfoEndStart")

filepath4 = r'/Users/cartersocha/Downloads/instgramAccountCounts.xlsx'
igAccountDf = pd.read_excel(filepath4)

filepath5 = r'/Users/cartersocha/Downloads/redditCountTest.xlsx'
redditSubsDf = pd.read_excel(filepath5)

filepath6 = r'/Users/cartersocha/Downloads/redditCommentCombo.csv'
redditCommentsDf = pd.read_csv(filepath6)

filepath8 = r'/Users/cartersocha/Desktop/ReleaseData.xlsx'
releaseDateDf = pd.read_excel(filepath8, "ReleaseDateData")

filepath10 = r'/Users/cartersocha/Downloads/googleDataset.csv'
googleDailyData = pd.read_csv(filepath10)

### Functions

In [108]:
def DateTimeConvert(dateDf, dateColumn):
    dateDf[dateColumn] = pd.to_datetime(dateDf[dateColumn])  

    return dateDf

In [109]:
def DiffMaker(fillnaDf, valueColumn, dateColumn):
    fillnaDf.sort_values(['TvShow', dateColumn], inplace=True)

    fillnaDf['diffs'] = fillnaDf.groupby(['TvShow'])[valueColumn].transform(lambda x: x.diff()).fillna(0)

    fillnaDf.sort_index(inplace=True)

    return fillnaDf

In [110]:
def RemoveData(release,showDf):

    bigDf = pd.DataFrame()

    for i in range(len(release)):
        show = release['TvShow'][i]
        firstDate = release['Release Date'][i]
        secondDate = release['90DayDate'][i]

        smallDf = showDf[showDf['TvShow'] == show]

        newdf = smallDf[smallDf['RunDate'].between(firstDate, secondDate)]

        bigDf = bigDf.append(newdf,ignore_index=True)

    return bigDf

In [111]:
def MergeDfs(mainDf, secondDf, columnName):

    merged = pd.merge(mainDf,secondDf, how='outer', on=columnName)
    return merged

In [112]:
import numpy as np

def NegativeDiffs(diffDf,columnName):

    diffDf['zeroedDiffs'] = np.where((diffDf[columnName] < 0), 0, diffDf[columnName])

    return diffDf

In [113]:
def SumSocialMedia(sumDf,columnName,trigger):
    # check to see if this is episode count or generic summarization
    if trigger == 1:
        summarizedDf = sumDf.groupby(columnName, as_index=False).sum()
        summarizedDf = pd.DataFrame(summarizedDf)
    
    else:
        summarizedDf = sumDf.groupby(columnName, as_index=False).count()
        summarizedDf = pd.DataFrame(summarizedDf)
    
    return summarizedDf

### Data Transformations

##### Release Data

In [114]:
releaseDf['90DayDate'] = releaseDf['Release Date'] + pd.DateOffset(days=120)

releaseDf = DateTimeConvert(releaseDf,'90DayDate')
releaseDf = DateTimeConvert(releaseDf,'Release Date')

##### Instagram Hashtag Data

In [115]:
instaHashtagDf = DateTimeConvert(instaHashtagDf,'RunDate')

instaHashtagDf = DiffMaker(instaHashtagDf,'HashtagValue','RunDate')

instaHashDf = RemoveData(releaseDf, instaHashtagDf)
instaHashDf = NegativeDiffs(instaHashDf, 'diffs')
instaHashDf['SocialMediaSource'] = 'InstagramHashtag'
instaHashDf['SocialMediaValue'] = instaHashDf['zeroedDiffs']

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Twitter Data

In [116]:
dailyTweetDf = DateTimeConvert(dailyTweetDf,'RunDate')
dailyTweetDf = RemoveData(releaseDf, dailyTweetDf)
dailyTweetDf['SocialMediaValue'] = dailyTweetDf['TweetCount'] + dailyTweetDf['RetweetCount']
dailyTweetDf['SocialMediaSource'] = 'Tweets'

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Reddit Comment Data

In [117]:
redditCommentsDf = DateTimeConvert(redditCommentsDf,'RunDate')
redCommentDf = RemoveData(releaseDf, redditCommentsDf)
redCommentDf['SocialMediaValue'] = redCommentDf['NumComments'] + redCommentDf['score']

redCommentDf = redCommentDf[['TvShow','RunDate','SocialMediaValue']]
iRedCommentDf = SumSocialMedia(redCommentDf, ['TvShow','RunDate'],1)
iRedCommentDf['SocialMediaSource'] = 'RedditComments'

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Instagram Account Data

In [118]:
igAccountDf = DateTimeConvert(igAccountDf,'RunDate')

igAccountDf = DiffMaker(igAccountDf,'IgAccountCounts','RunDate')


instaAccDf = RemoveData(releaseDf, igAccountDf)
instaAccDf = NegativeDiffs(instaAccDf, 'diffs')
instaAccDf['SocialMediaSource'] = 'InstagramAccount'
instaAccDf['SocialMediaValue'] = instaAccDf['zeroedDiffs']

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Reddit Subscriber Data

In [119]:
redditSubsDf = DateTimeConvert(redditSubsDf,'RunDate')

redditSubsDf = DiffMaker(redditSubsDf,'RedditSubs','RunDate')

redSubDf = RemoveData(releaseDf, redditSubsDf)
redSubDf = NegativeDiffs(redSubDf, 'diffs')
redSubDf['SocialMediaSource'] = 'RedditSubscribers'
redSubDf['SocialMediaValue'] = redSubDf['zeroedDiffs']

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Google Data

In [120]:
googleDailyData = DateTimeConvert(googleDailyData,'RunDate')
googDf1 = RemoveData(releaseDf, googleDailyData)
googDf1 = NegativeDiffs(googDf1, 'GoogleValue')
googDf1['SocialMediaSource'] = 'Google'
googDf1['SocialMediaValue'] = googDf1['zeroedDiffs']

  bigDf = bigDf.append(newdf,ignore_index=True)


##### Release Date Data

In [121]:
releaseDateDf = DateTimeConvert(releaseDateDf,'EpisodeReleaseDate')
iReleaseData = SumSocialMedia(releaseDateDf, ['TvShow','EpisodeReleaseDate'],0)
iReleaseData['DailyReleaseCount'] = iReleaseData['EpisodeNumber']
iReleaseData['RunDate'] = iReleaseData['EpisodeReleaseDate']

##### Join the Data

##### Time Value Datasets

In [122]:
instaAccDf = instaAccDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]#,'diffs']]
redSubDf = redSubDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]#,'diffs']]
dailyTweetDf = dailyTweetDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]
instaHashDf = instaHashDf[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]#,'diffs']]
googDf1 = googDf1[['TvShow','RunDate','SocialMediaValue','SocialMediaSource']]
# reddit comment data is combined elsewhere
iReleaseData = iReleaseData[['TvShow','RunDate','DailyReleaseCount']]

In [123]:
uberDf2 = pd.concat([instaAccDf,redSubDf,dailyTweetDf,instaHashDf,googDf1,iRedCommentDf])

uberDf3 = MergeDfs(uberDf2, releaseDf,'TvShow')
uberDf3

uberDf3 = MergeDfs(uberDf3, iReleaseData,['TvShow','RunDate'])
uberDf3 = uberDf3[uberDf3['Ignore'] == 1]
uberDf3 = uberDf3.fillna(0)
uberDf3['PostReleaseDay'] = (uberDf3['RunDate'] - uberDf3['Release Date']).astype(str).str.replace(' days','').astype(int)

### Output - individuals & uber dataset

In [124]:
uberDf3 = uberDf3.sort_values(['TvShow','RunDate','SocialMediaSource'])

In [125]:
uberdf4= uberDf3[uberDf3['PostReleaseDay'] <= 75]

filepath2 = r'/Users/cartersocha/Downloads/uberDataset2.csv'

uberdf4.to_csv(filepath2) 

In [127]:
uberdf4.groupby('TvShow').max().head(15)

Unnamed: 0_level_0,RunDate,SocialMediaValue,SocialMediaSource,Stream,EpisodeCount,SeasonNumber,ReleaseCadence,Release Date,Ignore,ShowStatus,90DayDate,DailyReleaseCount,PostReleaseDay
TvShow,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ABlackLadySketchShow,2022-06-22,1402.0,Tweets,HBOMax,6.0,3.0,Weekly,2022-04-08,1.0,Renewed,2022-08-06,1.0,75
AboutLastNighthbo,2022-04-26,1.0,InstagramHashtag,HBOMax,8.0,1.0,Binge,2022-02-10,1.0,Undecided,2022-06-10,8.0,75
Archive81,2022-03-30,7238.0,Tweets,Netflix,8.0,1.0,Binge,2022-01-14,1.0,Cancelled,2022-05-14,7.0,75
Atlantafx,2022-06-08,94942.0,Tweets,Hulu,10.0,3.0,Hybrid,2022-03-25,1.0,Completed,2022-07-23,2.0,75
Barry,2022-07-08,82471.0,Tweets,HBOMax,8.0,3.0,Weekly,2022-04-24,1.0,Renewed,2022-08-22,1.0,75
BelAirPeacock,2022-04-29,56622.0,Tweets,Peacock,10.0,1.0,Hybrid,2022-02-13,1.0,Renewed,2022-06-13,3.0,75
BlingEmpire,2022-07-27,8471.0,Tweets,Netflix,8.0,2.0,Binge,2022-05-13,1.0,Renewed,2022-09-10,8.0,75
Bridgerton,2022-06-08,463312.0,Tweets,Netflix,8.0,2.0,Binge,2022-03-25,1.0,Renewed,2022-07-23,8.0,75
ConversationsWithFriends,2022-07-29,22927.0,Tweets,Hulu,12.0,1.0,Binge,2022-05-15,1.0,Cancelled,2022-09-12,12.0,75
DriveToSurvive,2022-05-25,157711.0,Tweets,Netflix,12.0,4.0,Binge,2022-03-11,1.0,Renewed,2022-07-09,12.0,75
