# FL Studio Tuturials on Youtube

## Getting started

In [1]:
# To run this code, the pip package "google-api-python-client" is required
# If not installed, please uncomment and run the following line

# pip install google-api-python-client

#### Once installed, restart and clean the kernel and continue from here:

In [2]:
from googleapiclient.discovery import build
import csv
import time
import os
import json
import pandas as pd

#### Create an api_key and store this credential as a Environment Variable on your local device
In this project, the variable is defined in Terminal as  "YOUTUBE_API". For more information, check [this page](https://tilburgsciencehub.com/building-blocks/store-and-document-your-data/store-data/environment-variables/) or [this instructional video](https://www.youtube.com/watch?v=5iWhQWVXosU).

In [3]:
api_key = os.environ["YOUTUBE_API"]
api_key = 'AIzaSyB770EnyGnPvNtdIgvqVyZIsnBeinWuyzQ'

In [4]:
youtube = build('youtube', 'v3', developerKey= api_key)

#### Testing the systems
The cells below will check whether the API functions. 

Within this document, each definition of a parameter, operation or function is explained within the cells. These definitions should be considered as the same throughout the project unless mentioned otherwise.

In [5]:
# check the default number of results, this should give 5

# .search = executes the search method 
# .list = retrieves a list of zero or more resources
# q = query term
# part = identifies group of properties that should be returned
# type = type of resource
# snippet = provides overview with information about the video such as titles, description, thumbnails and tags
# .execute = executes the request

request = youtube.search().list(
            q='FL tutorial',
            part='snippet',
            type='video')
response = request.execute()
print('Total items: ' , len(response['items']))

Total items:  5


In [6]:
# check the maximum number of results, this should give 50

# maxResults = specifies the number of items that should be returned with a maximum of 50

request = youtube.search().list(q='FL tutorial',part='snippet',type='video',maxResults=50)
response = request.execute()
print('Total items: ' , len(response['items']))

Total items:  50


If the two cells above gave you the outputs **5** and **50** respectfully, the API works correctly.

#### Gathering the data

In [7]:
# creates basic for data collection

# 
#
#
# next_page = every page contains a nextPageToken and this is used to iterate over multiple pages


# no_requests = 0
# max_requests = 100




def retreive_data(no_requests, max_requests):
    search_res = []
    
    while no_requests <= max_requests:
        try:
            no_requests += 1
            # if no have requests have been executed, then go find the first result
            # else get the result from the next page
            if (no_requests==1): 
                request = youtube.search().list(q='FL tutorial',part='snippet',type='video',maxResults=50)
            else:
                request = youtube.search().list(q='FL tutorial',part='snippet',type='video',maxResults=50, pageToken = next_page)

            # capture response and set next page
            response = request.execute()
            next_page = response['nextPageToken']
        except:    
            # if no next page is found, then stop the script
            break

        for item in response['items']:
            search_res.append(item['snippet'])
        time.sleep(2)
        
    return search_res

In [8]:
# makes a list of videoIDs

videoIDs = []
search_res = retreive_data(no_requests=0, max_requests=100)

for item in search_res:
    videoIDs.append(item['thumbnails']['default']['url'][23:34])

print("Found " + str(len(videoIDs)) + " video IDs!")

Found 600 video IDs!


In [9]:
# --- start of gathering statistics --- #

In [10]:
# response for stats 
response_stats = []

for vid in videoIDs:
    stats = youtube.videos().list(part='statistics',id=vid)
    response_stats.append(stats.execute())
    res_stats = {}
    
    for item in response_stats:
        stats = item['items'][0]['statistics']
        res_stats[item['items'][0]['id']] = stats

In [11]:
# output stats to json file
import json
converted_to_string = json.dumps(res_stats)
f = open('stats_output.json', 'w', encoding='utf-8')
f.write(converted_to_string + '\n')
f.close()

In [12]:
# read json file for stats
f = open('stats_output.json', 'r', encoding='utf-8')
content = f.readlines()
for item in content:
    jsonobj = json.loads(item)
    f.close()

In [13]:
# write df for stats and convert to csv
dfstats = pd.read_json(r'stats_output.json', orient = 'index')

In [14]:
# drops favoriteCount column, this is an old feature that does not longer exist on YouTube

dfstats.drop(dfstats.columns[3],axis=1,inplace=True)

In [15]:
# Clean stats columns by removing NaNs and converting to int

cols_stats = ['viewCount', 'likeCount', 'dislikeCount', 'commentCount']

dfstats[cols_stats] = dfstats[cols_stats].fillna(0)
dfstats[cols_stats] = dfstats[cols_stats].astype(int)

In [16]:
# creates ratio for likes vs dislikes and comments vs viewcount in percentages

dfstats['likeRatio %'] = (dfstats['likeCount']/(dfstats['likeCount'] + dfstats['dislikeCount']))*100
dfstats['likeRatio %'] = dfstats['likeRatio %'].apply(lambda x: '%.1f' % x)

dfstats['commentRatio %'] = (dfstats['commentCount']/dfstats['viewCount'])*100
dfstats['commentRatio %'] = dfstats['commentRatio %'].apply(lambda x: '%.4f' % x)

In [17]:
dfstats.to_csv('video_statistics.csv', index_label ='id')

In [19]:
# --- start of gathering snippets --- #
# response for snippet
response_snippets = []

for item in videoIDs:
    snippets = youtube.videos().list(part='snippet',id=item)
    response_snippets.append(snippets.execute())
    res_snippets = {}
    
    for item in response_snippets:
        snippets = item['items'][0]['snippet'] 
        res_snippets[item['items'][0]['id']] = snippets

In [20]:
# output snippets to json file

converted_to_string = json.dumps(res_snippets)
f = open('snippet_output.json', 'w', encoding='utf-8')
f.write(converted_to_string + '\n')
f.close()

In [21]:
# read json file for snippet
f = open('snippet_output.json', 'r', encoding='utf-8')
content = f.readlines()
for item in content:
    jsonobj = json.loads(item)
    f.close()

In [22]:
# write df for snippet to csv
dfsnip = pd.read_json(r'snippet_output.json', orient = 'index')

In [23]:
# drops column liveBroadcastContent which contains only the value "none"

dfsnip.drop(dfsnip.columns[8],axis=1,inplace=True)
dfsnip

Unnamed: 0,publishedAt,channelId,title,description,thumbnails,channelTitle,tags,categoryId,localized,defaultAudioLanguage,defaultLanguage
pDIsEZsalAo,2018-05-23T15:33:40Z,UCIcCXe3iWo6lq-iWKV40Oug,FL Studio 20 - Complete Beginner Basics Tuto...,This video is the first in the FL Studio 20 Ba...,{'default': {'url': 'https://i.ytimg.com/vi/pD...,In The Mix,"[FL Studio 20, FL Studio 20 begginer tutorial,...",26,{'title': 'FL Studio 20 - Complete Beginner ...,en-GB,
Lc4SHa-O4T0,2020-05-28T18:00:09Z,UC4Oy0UtQpTBGX46q_OkWzXg,FL STUDIO 20 BEGINNER BEAT MAKING TUTORIAL 202...,FL STUDIO 20 BEGINNER BEAT MAKING TUTORIAL 202...,{'default': {'url': 'https://i.ytimg.com/vi/Lc...,Chuki Beats II,"[making a beat in fl studio, making a beat, ma...",26,{'title': 'FL STUDIO 20 BEGINNER BEAT MAKING T...,en,en
x5-GweOYXtk,2020-01-29T09:18:58Z,UCIUtwq6jMFYQj2ASOj95TGw,FL Studio 20 Tutorial | Full FL Studio Crash C...,âœ…Click Below to get our full Complete FL Studi...,{'default': {'url': 'https://i.ytimg.com/vi/x5...,Tomas George,,27,{'title': 'FL Studio 20 Tutorial | Full FL Stu...,en-GB,
jYBjcEVTSB8,2020-12-25T16:21:45Z,UC4Oy0UtQpTBGX46q_OkWzXg,FL STUDIO BEAT MAKING TUTORIAL FOR BEGINNERS 2...,"'Secret Sauce' , Episode 1 - How to make beats...",{'default': {'url': 'https://i.ytimg.com/vi/jY...,Chuki Beats II,"[making a beat in fl studio, making a beat, ma...",26,{'title': 'FL STUDIO BEAT MAKING TUTORIAL FOR ...,en,en
elWE2jqjwVc,2021-06-16T15:14:19Z,UCsgaFTYm3fvDnkPWb-FIa_w,FL STUDIO 20 TUTORIAL | Complete Beginners Tut...,ðŸ”¥ BEST BLACK FRIDAY VST DEALS / FREEBIES 2021 ...,{'default': {'url': 'https://i.ytimg.com/vi/el...,Producer Sphere,"[fl studio 20 tutorial, fl studio tutorial 202...",10,{'title': 'FL STUDIO 20 TUTORIAL | Complete Be...,en-US,
...,...,...,...,...,...,...,...,...,...,...,...
4Tk1zs0Bs-k,2021-11-24T04:15:44Z,UC5-tX1b_iJtn8yGoPCvkiSw,Cara membuat musik di android untuk pemula..!!...,#caramembuatmusikdihp #flstudiomobiletutorial ...,{'default': {'url': 'https://i.ytimg.com/vi/4T...,AR REMIX,,10,{'title': 'Cara membuat musik di android untuk...,id,
qjArVukPOQo,2021-11-25T00:37:52Z,UCJ0sIAYr-4oy6U8N3CGCKSg,Buju - Testimony - Remake/Tutorial,[Checkout My Soundkits]\n\nRemake Pack vol1\nh...,{'default': {'url': 'https://i.ytimg.com/vi/qj...,POJbeatz,,10,{'title': 'Buju - Testimony - Remake/Tutorial'...,,
Xi432wVgM8A,2021-11-25T19:37:43Z,UCoPeLbeRgCY-mOKCsJKf5Hg,Industrial Hardcore Kick FL Studio 20 + Freque...,My webstore: https://www.hcontunes.com/store \...,{'default': {'url': 'https://i.ytimg.com/vi/Xi...,HCONTUNES,"[industrial hardcore kick, industrial hardcore...",10,{'title': 'Industrial Hardcore Kick FL Studio ...,,
tzyZQX8QT-s,2021-11-25T22:44:53Z,UCbTXllgA37QXhio_jyGz5Pg,Afro House From Scratch on FL Studio 20,For mixing and mastering and ghost production:...,{'default': {'url': 'https://i.ytimg.com/vi/tz...,EyeRonik Productions,"[FL Studio, South African Deep House, How To M...",10,{'title': 'Afro House From Scratch on FL Studi...,,


In [24]:
dfsnip.to_csv('video_snippets.csv', index_label ='id')

In [25]:
#--- creation of Channel ID list ---#
ChannelIDs = []

for item in search_res:
    ChannelIDs.append(item['channelId'])

In [26]:
# Response channel

response_channel = []
res_channels = {}
for chn in ChannelIDs:
    channel = youtube.channels().list(part='statistics',id=chn)
    response_channel.append(channel.execute())
    
    for chn in response_channel:
        channelstat = chn['items'][0]['statistics']
        res_channels[chn['items'][0]['id']] = channelstat

In [27]:
# output channel to json file

converted_to_string = json.dumps(res_channels)
f = open('channels_output.json', 'w', encoding='utf-8')
f.write(converted_to_string + '\n')
f.close()

In [28]:
# read json file for channel
f = open('channels_output.json', 'r', encoding='utf-8')
content = f.readlines()
for item in content:
    jsonobj = json.loads(item)
    f.close()

In [29]:
# write df for channel to csv
dfchn = pd.read_json(r'channels_output.json', orient = 'index')
dfchn.to_csv('video_channels.csv', index_label ='channelId')

In [30]:
#--- merging the video snippets and statistics --- #
dfsnip = pd.read_csv('video_snippets.csv')
dfstats = pd.read_csv('video_statistics.csv')

dfmerged = dfsnip.merge(dfstats, on='id')
dfmerged.to_csv('video_output.csv')