In [1]:
import pandas as pd
from googleapiclient.discovery import build


In [2]:
from random import sample

Generate the list of Alt-Right channel names identified and labeled by Ribeiro et al.

In [3]:
ain_df = pd.read_csv('./ribeiro_ain.csv')
ain_df.head()

Unnamed: 0,Name,Category,Data Collection step,Id
0,DailyCallerVideo,Alt-lite,2,UCfDdlNLRVb1h3_7Xh-WhL3w
1,Liberty Machine News,Alt-lite,2,UCso6oQ3qy7z61Fr8b0WNBMw
2,AIU-Resurrection,Alt-lite,2,UCplXdu0g94UynzYCmO2zOjw
3,Tree Of Logic,Alt-lite,2,UCl3RCEtooHD5bhPCHJw3btA
4,DailyKenn,Alt-lite,2,UCbukZErY9SjdGTHabMzY7qg


In [4]:
alt_lite_df = ain_df[ain_df['Category']=='Alt-lite']
alt_lite_df.head()

Unnamed: 0,Name,Category,Data Collection step,Id
0,DailyCallerVideo,Alt-lite,2,UCfDdlNLRVb1h3_7Xh-WhL3w
1,Liberty Machine News,Alt-lite,2,UCso6oQ3qy7z61Fr8b0WNBMw
2,AIU-Resurrection,Alt-lite,2,UCplXdu0g94UynzYCmO2zOjw
3,Tree Of Logic,Alt-lite,2,UCl3RCEtooHD5bhPCHJw3btA
4,DailyKenn,Alt-lite,2,UCbukZErY9SjdGTHabMzY7qg


In [5]:
# From old build_polarizer (see Alex branch of Siqi's Polarizer code)
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'
dev_key_file = '../yt_api_dev_key.txt'
text_file = open(dev_key_file, "r")
DEVELOPER_KEY = text_file.read()
text_file.close()

In [6]:
def check_api_available(cid):
    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=cid
    )
    response = request.execute()
    if response['pageInfo']['totalResults'] == 0:
        return False
    return True


In [9]:
len(list(alt_lite_df['Id']))

113

In [8]:
alt_lite_ids = []
for cid in alt_lite_df['Id']:
    if check_api_available(cid):
        alt_lite_ids.append(cid)

In [21]:
d = {'id':alt_lite_ids}
df = pd.DataFrame(d)

In [22]:
# out of 113 channels identified by Ribeiro above, how many are remaining?
len(alt_lite_ids)

94

In [23]:
df.to_csv('ribeiro_alt-lite.csv', index=False, header=False)

Grab 20 most recent short videos and 20 most recent medium videos from each channels in Alt-Right that are either short (<4 mins) or medium (4-20 mins) length 

In [24]:
def get_recent_videos(cid, date, length):
    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
    request = youtube.search().list(
        part="snippet,id",
        order="date",
        type='video',
        channelId=cid,
        maxResults=10,
        videoDuration=length
    )
    response = request.execute()
    vids = []
    for item in response['items']:
        vid = item['id']['videoId']
        date = item['snippet']['publishedAt']
        vids.append({'cid': cid, 'vid':vid, 'date':date})
    return vids


In [25]:
response = get_recent_videos('UC_x5XG1OV2P6uZZ5FSM9Ttw', None, 'short')
print(response)

[{'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': '6hdMspVzslY', 'date': '2022-05-09T15:00:08Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': 'WPzM7JjuAlk', 'date': '2022-05-06T21:00:12Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': 'RrzsBq4fMBM', 'date': '2022-05-06T16:00:13Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': 'gK2u2FC6eS0', 'date': '2022-05-05T21:00:14Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': '0Pt0Ko7g_t4', 'date': '2022-04-27T21:00:12Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': 'm0NPlatW0ro', 'date': '2022-04-22T21:00:18Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': 'rS6Y8xiBgfw', 'date': '2022-04-19T21:00:32Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': 'DNvFv-sNCfU', 'date': '2022-04-14T21:00:10Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': 'q9O8uD0X4Qg', 'date': '2022-04-13T23:00:15Z'}, {'cid': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'vid': 'cB_-Pf2tvqM', 'date': '2022-04-12T16:00:18Z'}]


In [26]:
alt_lite_df = pd.read_csv('./ribeiro_alt-lite.csv', header=None)
alt_lite_df.head()

Unnamed: 0,0
0,UCfDdlNLRVb1h3_7Xh-WhL3w
1,UCso6oQ3qy7z61Fr8b0WNBMw
2,UCl3RCEtooHD5bhPCHJw3btA
3,UC7tlcXIInkPG5aCkWB9lZOQ
4,UCSFVD7Xfhn7sJY8LAIQmH8Q


In [27]:
cids = alt_lite_df[0]

In [28]:
vids = []
for cid in cids:
    print(cid)
    response_short = get_recent_videos(cid, None, 'short')
    response_medium = get_recent_videos(cid, None, 'medium')
    vids.append(response_short)
    vids.append(response_medium)

UCfDdlNLRVb1h3_7Xh-WhL3w
UCso6oQ3qy7z61Fr8b0WNBMw




UCl3RCEtooHD5bhPCHJw3btA


HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet%2Cid&order=date&type=video&channelId=UCl3RCEtooHD5bhPCHJw3btA&maxResults=10&videoDuration=short&key=AIzaSyD2N0qxNJtj3NKT2DBursxa6kzA6DZBC_Y%0A&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [68]:
# Bug with 2-d list rather than 1-d
new_vids = []
for vid_list in vids:
    for vid in vid_list:
        new_vids.append(vid)

In [69]:
new_vids

[{'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': 'zlQMwWkZNp8',
  'date': '2018-06-27T09:50:11Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': '7r9W-qadhVQ',
  'date': '2018-03-01T06:25:17Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': 'P_xubHEaR6w',
  'date': '2017-12-20T03:46:07Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': 'w3zFUZejKxM',
  'date': '2017-12-11T16:30:46Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': 'AnUPJEs8aWc',
  'date': '2017-09-23T21:28:47Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': 'L5A2wUcndjo',
  'date': '2017-08-08T05:27:35Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': '1XQ9_oNrl1E',
  'date': '2017-07-31T23:17:40Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': '6kJZeyL-jKI',
  'date': '2017-07-31T23:17:50Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': 'F_FItO0Gxrw',
  'date': '2017-06-19T20:36:47Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  'vid': 'vtzidOkBcRE',
  'date': '2017-06-03T16:28:32Z'},
 {'cid': 'UCSTy-H5lSiCcozas32sfJlQ',
  '

In [70]:
alt_lite_vids_df = pd.DataFrame(new_vids)
alt_lite_vids_df.head()

Unnamed: 0,cid,vid,date
0,UCSTy-H5lSiCcozas32sfJlQ,zlQMwWkZNp8,2018-06-27T09:50:11Z
1,UCSTy-H5lSiCcozas32sfJlQ,7r9W-qadhVQ,2018-03-01T06:25:17Z
2,UCSTy-H5lSiCcozas32sfJlQ,P_xubHEaR6w,2017-12-20T03:46:07Z
3,UCSTy-H5lSiCcozas32sfJlQ,w3zFUZejKxM,2017-12-11T16:30:46Z
4,UCSTy-H5lSiCcozas32sfJlQ,AnUPJEs8aWc,2017-09-23T21:28:47Z


In [71]:
alt_lite_vids_df.to_csv('./alt_lite_vids.csv', index=False)

Sample 40 videos from Alt-Right

In [72]:
alt_lite_vids = list(alt_lite_vids_df['vid'])

In [73]:
sampled_vids = sample(alt_lite_vids, 40)
print(sampled_vids)

['u8GMYIs4cRI', 'Cphq4DZfFLw', 'R1dtDMyOTDc', 'ariHTB-qdpc', 'jBZ1Y3t-k4o', 'snFkGHpbX38', '5eEhLlRQFdE', '-zuhNFgimyU', 'lCR6hTW9Clw', 'kIOzBlphHBs', 'b7mFJUOzpdU', 'oyhCzCd3Ouo', '9uKMK3CnvEE', 'kuARFR_lyjA', '9xVX8vdL9-I', '8BzGHkWyVkg', 'csPMnw701MQ', 'kViCGMMdRFM', 'Rr8QaMlSLjE', 'vtzidOkBcRE', 'T7ZJMS5AUNE', 'kaLrOH1Y_y0', 'DIL5JB_F6d4', '2K1JzBZmbug', 'aFiS90_Dywk', 'VEBipOj2rkU', 'vm6MzvC-8pU', '3BFmEv_5mKU', 'xTVYLeMzTgQ', '8GvTA767gHU', 'iyboBvfQmkI', 'AVWTbcjaL7o', 'CWEMXCHPdvw', 'QkKdriM0aTg', 'MhA1u0UtpSc', 'XB4ptBXroTw', 'UcGWitwKEKc', '1Ua2XnYlm-o', '_KKOLEtMarE', '9YGiXHNOFzM']
