In [5]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import openpyxl
import os
from googleapiclient.discovery import build
from IPython.display import JSON
from datetime import date
from datetime import datetime
import calendar
import isodate
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np

In [6]:
api_key = 'AIzaSyAc854gvMOlbCPleAYUyg3Lqn7TL5KFmy4'
all_data = []
api_service_name = "youtube"
api_version = "v3"
    # Get credentials and create an API client
youtube = build(
            api_service_name, api_version, developerKey=api_key)

In [7]:
def requestData(youtube,channel_id):
    all_data = []
    request = youtube.channels().list(
            part="snippet,contentDetails,statistics",
            id=channel_id
            )
    response = request.execute()
    JSON(response)
    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
            'startDate' : item['snippet']['publishedAt'],
            'totalViews' : item['statistics']['viewCount'],
            'totalSub' : item['statistics']['subscriberCount'],
            'numofVideos' : item['statistics']['videoCount'],
            'playListId' : item['contentDetails']['relatedPlaylists']['uploads']
           }
        break;
    all_data.append(data)
    return (pd.DataFrame(all_data))

In [8]:
def get_video_ids(youtube, playlist_id):
    video_ids = []
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults = 50
    )
    response = request.execute()
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])
    
    next_page_token = response.get('nextPageToken')
    while next_page_token is not None:
        request = youtube.playlistItems().list(
                    part='contentDetails',
                    playlistId = playlist_id,
                    maxResults = 50,
                    pageToken = next_page_token)
        response = request.execute()

        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

        next_page_token = response.get('nextPageToken')
        
    return video_ids

In [9]:
def get_video_details(youtube,video_ids):
    all_video_info = []
    for i in range (0,len(video_ids),50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute()
        for video in response['items']:
            video_info_keep = {'snippet':['channelTitle','title','publishedAt'],
                          'statistics' : ['viewCount','likeCount','commentCount'],
                          'contentDetails' : ['duration']}

            video_info = {}
            video_info['video_id'] = video['id']
            for key in video_info_keep.keys():
                for value in video_info_keep[key]:
                    try:
                        video_info[value] = video[key][value]
                    except:
                        video_info[value] = None
            all_video_info.append(video_info)
    return pd.DataFrame(all_video_info)

In [10]:
def clean_data(df):
    ##changes dates to days of week and from datetime to regular date
    df['publishedAt'] = pd.to_datetime(df['publishedAt'])
    df['publishedDay'] =df['publishedAt'].dt.day_name()
    df['publishedAt'] = pd.to_datetime(df['publishedAt']).dt.date
    df = df[['channelID','video_id', 'channelTitle', 'title', 'publishedAt', 'publishedDay', 'viewCount','likeCount', 'commentCount', 'duration']]
    ##Changes the duration of the video into minutes 
    df['durationMinutes'] = df['duration'].apply(lambda x: isodate.parse_duration(x))
    df['durationMinutes'] = df['durationMinutes'].astype('timedelta64[s]')
    df['durationMinutes'] = round(df['durationMinutes']/60,1)
    ##changes final columns to numeric
    numeric_cols = ['viewCount','likeCount','commentCount','durationMinutes']
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric,errors = 'coerce', axis = 1)
    return df

## Dave Ramsey Data

In [11]:
channel_id = "UC7eBNeDW1GQf2NJQ6G6gAxw"
DRamsey_channel_stats = requestData(youtube,channel_id)
DRamsey_channel_stats['startDate'] = pd.to_datetime(DRamsey_channel_stats['startDate']).dt.date
playlist_id = requestData(youtube,channel_id)['playListId'].values[0]

In [None]:
DRamsey_video_ids = get_video_ids(youtube,playlist_id)
DRamsey_video_df = get_video_details(youtube,DRamsey_video_ids)

In [None]:
DRamsey_video_df['channelID'] = "UC7eBNeDW1GQf2NJQ6G6gAxw"
DRamsey_video_df = clean_data(DRamsey_video_df)

In [None]:
DRamsey_channel_stats['channelID'] = "UC7eBNeDW1GQf2NJQ6G6gAxw"

In [None]:
DRamsey_video_df['dateCode'] = DRamsey_video_df['publishedAt'].astype('string').replace(to_replace= r'-', value= '', regex=True).astype('int')

In [None]:
DRamsey_video_df['publishedAt'] = pd.to_datetime(DRamsey_video_df['publishedAt'], format="%Y/%m/%d")

In [None]:
DRamsey_video_df = DRamsey_video_df[['title', 'publishedAt',
       'publishedDay', 'viewCount', 'likeCount','dateCode']]

In [None]:
DRamsey_video_df['viewCount'] = DRamsey_video_df['viewCount'].astype(int)

## Stock Data

In [None]:
stocks_df = pd.read_csv('https://raw.githubusercontent.com/deanymar/DaveRamseyAnalysis/main/stocks.csv')

In [None]:
stocks_df

In [None]:
stocks_df['dateCode'] = stocks_df['Date'].astype('str').str.split('/')
stocks_df['dateCode'] = stocks_df['dateCode'].map(lambda x: x[2]+x[1]+x[0])
stocks_df['dateCode'] = stocks_df['dateCode'].astype(int)
stocks_df['Close'] = stocks_df['Close'].str.strip().str.replace(",","")
stocks_df['Close'] = stocks_df['Close'].astype(float)

In [None]:
stocks_df['Date']=pd.to_datetime(stocks_df['Date'].astype(str),format='%d/%m/%Y')

## Analysis

In [None]:
total_views_perday = DRamsey_video_df.groupby(['publishedAt']).mean().reset_index()
total_views_perday['viewCount'] = total_views_perday['viewCount'].astype(int) 

In [None]:
total_views_perday['publishedDay'] =total_views_perday['publishedAt'].dt.day_name()

In [None]:
total_views_perday['dateCode'] = total_views_perday['publishedAt'].astype('string').replace(to_replace= r'-', value= '', regex=True).astype('int')

In [None]:
##Decided to merge the dataframes together so I can have a date the video posted and to see the close date
df= pd.merge(total_views_perday, stocks_df, how='inner')
df.sort_values('dateCode',ascending =True)

In [None]:
df_group = df.groupby('publishedDay').sum().reset_index().sort_values('viewCount')


In [None]:
ax= sns.barplot(x="publishedDay", y="viewCount",
             data=df_group
          )
ax.set(xlabel='Day', ylabel='Total Views')
plt.ticklabel_format(style='plain', axis='y')
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_theme(style="darkgrid")
sns.lineplot(x=df["Date"], y="viewCount",
             data=df)

In [None]:
sns.lineplot(x="Date", y="Close",
             data=df)

In [None]:
sns.lineplot(x=df["Date"].dt.year, y="viewCount",
             data=df)

In [None]:
sns.lineplot(x=df["Date"].dt.year, y="Close",
             data=df)

In [None]:
df_correlation =  df.corr()
sns.heatmap(df_correlation, annot=True)