## Implementing Chris Doenlen's 'Bot Or Not' Python Module

Everything from `twitter_funcs.py` was cloned from Chris' [repository](https://github.com/scrapfishies/twitter-bot-detection).

I will use this to label each user as 'bot' (boolean 1/0).

In [1]:
import pandas as pd
import numpy as np
from copy import deepcopy
from twitter_funcs import *

# Imports
import os
import numpy as np
import pandas as pd

import pickle
import tweepy

from datetime import datetime
from secrets import api_secret_key, api_key, bearer_token
import re
import time
import csv

In [2]:
data = pd.read_pickle("pickle/balanced_nov2_tweets.pick")

In [3]:
data.shape

(144000, 9)

In [4]:
data.head(3)

Unnamed: 0,id,date,time,user_id,username,tweet,hashtags,trump,biden
181142,1323379284434669568,2020-11-02,21:39:43,2820503362,artistacriseida,All these articles showing that Biden is in th...,[],False,True
0,1323414585995526144,2020-11-02,23:59:59,1312487180258820096,annapieters17,@FoxNews Lady Gaga’s a nobody. Can’t figure ou...,[],False,True
4,1323414585232293888,2020-11-02,23:59:59,2335763630,kylechwatt,@The_Grupp “It is purely a fortuity that this ...,[],False,True


I'm getting a rate limit error. according to twitter site, I can lookup 300 users per 15 minutes. Let's try that.

In [6]:
def chunks(user_ids, n):
    """Yield successive n-sized chunks from user_ids (iterable)."""
    lst = list(user_ids)
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
    


Here's where we'll implement Chris Doenlen's 'Bot or Not' model.

In [7]:
with open("bot_model.pick", "rb") as read_file:
    xgb_model = pickle.load(read_file)

In [8]:
time.sleep(60*5)

In [None]:
verifieds = {}
bot_probas = {}

# don't repeat users we already have if script fails
user_ids = list(data.user_id.unique())
try:
    existing_stats = pd.read_csv("../data/user_stats.csv")
    existing_ids = set(existing_stats.user_id.unique())
    for user_id in existing_ids:
        user_ids.remove(existing_id)
    print(len(existing_ids))
except:
    pass

user_id_chunks = list(chunks(user_ids, n=300))

# now get stats for new users

csv_file = open("../data/user_stats.csv", "a")
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["user_id", "bot_proba", "verified"])
for chunk in user_id_chunks:
    print(f"Preparing chunk. Num users: {len(chunk)}")
    for user_id in chunk:
        print(f"Preparing user '{user_id}'")
        
        
        auth = tweepy.OAuthHandler(api_key, api_secret_key)
        api = tweepy.API(auth)
        
        try: # Gather features for bot/not bot model
            # Get user information from screen name
            user = api.get_user(user_id)

            # account features to return for predicton
            account_age_days = (datetime.now() - user.created_at).days
            verified = user.verified # will also use this in our data
            geo_enabled = user.geo_enabled
            default_profile = user.default_profile
            default_profile_image = user.default_profile_image
            favourites_count = user.favourites_count
            followers_count = user.followers_count
            friends_count = user.friends_count
            statuses_count = user.statuses_count
            average_tweets_per_day = np.round(statuses_count / account_age_days, 3)

            # manufactured features
            hour_created = int(user.created_at.strftime("%H"))
            network = np.round(np.log(1 + friends_count) * np.log(1 + followers_count), 3)
            tweet_to_followers = np.round(
                np.log(1 + statuses_count) * np.log(1 + followers_count), 3
            )
            follower_acq_rate = np.round(
                np.log(1 + (followers_count / account_age_days)), 3
            )
            friends_acq_rate = np.round(np.log(1 + (friends_count / account_age_days)), 3)

            # organizing list to be returned
            account_features = [
                verified, hour_created,geo_enabled,default_profile,default_profile_image,favourites_count,
                followers_count,friends_count,statuses_count,average_tweets_per_day,network,tweet_to_followers,
                follower_acq_rate,friends_acq_rate]

            if account_features == np.nan:
                proba = np.nan
                verified = np.nan
                csv_writer.writerow([user_id, proba, verified])
                continue

            else:
                user_m = np.matrix(account_features)
                proba = np.round(xgb_model.predict_proba(user_m)[:, 1][0] * 100, 2)
                verified = account_features[0]
                csv_writer.writerow([user_id, proba, verified])

        except:
            print(f'error encountered, skipping user {user_id}')
            proba = np.nan
            verified = np.nan
        
            csv_writer.writerow([user_id, proba, verified])
    print("Chunk complete. Waiting 15 minutes.")
    time.sleep(15*60+1)

csv_file.close()

Preparing chunk. Num users: 300
Preparing user '2820503362'
Preparing user '1312487180258820096'
Preparing user '2335763630'
Preparing user '3282973915'
Preparing user '105937456'
Preparing user '46538055'
Preparing user '100625142'
Preparing user '1219326209366155264'
Preparing user '1254928044529274883'
Preparing user '1288981025364860928'
Preparing user '65067002'
Preparing user '2562614485'
Preparing user '2246207654'
Preparing user '4261171756'
Preparing user '1066144442241953792'
Preparing user '42920728'
Preparing user '35221554'
Preparing user '1007960516'
Preparing user '1118489465008336897'
Preparing user '980467278385958912'
Preparing user '59143598'
Preparing user '1206954619789856769'
Preparing user '1243870145229008896'
Preparing user '1658455681'
Preparing user '989298900'
Preparing user '1267800950321876994'
Preparing user '908298889828106244'
Preparing user '807989050972000256'
Preparing user '849410927158054916'
Preparing user '1090811471985065984'
Preparing user '838

Preparing user '1322777576985735169'
Preparing user '773606728399282176'
Preparing user '44489923'
Preparing user '1291683999392436225'
Preparing user '789191924653498370'
Preparing user '979870450577686528'
Preparing user '1087596246'
Preparing user '1266580535201140736'
Preparing user '1558916702'
Preparing user '45435129'
Preparing user '1216524009728364544'
Preparing user '747223063029227521'
Preparing user '960993253863645184'
Preparing user '3411004725'
Preparing user '35604515'
Preparing user '29460341'
Preparing user '735431735148699649'
Preparing user '2457199196'
Preparing user '591354042'
Preparing user '1068676218516635649'
Preparing user '64909225'
Preparing user '1314028401980846082'
Preparing user '1682291196'
Preparing user '1033807310391394304'
Preparing user '889182369478959105'
Preparing user '2777388739'
Preparing user '1074950605611790336'
Preparing user '1235927786533138433'
Preparing user '930407064'
Preparing user '2708307975'
Preparing user '1299053433257828352

Preparing user '797466618049822722'
Preparing user '1275524470904668160'
Preparing user '337175830'
Preparing user '1090329114827878400'
Preparing user '1023791437417275394'
Preparing user '1122061632224550912'
Preparing user '1220490158656671745'
Preparing user '2161005588'
Preparing user '3911961377'
Preparing user '3124442283'
Preparing user '359512877'
Preparing user '1267950398872133639'
Preparing user '218148636'
Preparing user '24320587'
Preparing user '859109706278858753'
Preparing user '203718874'
Preparing user '17742810'
Preparing user '20442002'
Preparing user '1292619538970939398'
Preparing user '1321966961501315073'
Preparing user '1200973094883676160'
Preparing user '1110239603196256256'
Preparing user '1055551324287369216'
Preparing user '916075489789345792'
Preparing user '1284866944806596608'
Preparing user '1046176148152090624'
Preparing user '19753011'
Preparing user '3073232673'
Preparing user '1320724518155898880'
Preparing user '104334135'
Preparing user '2996493

Preparing user '2617596283'
Preparing user '51776536'
Preparing user '25790437'
Preparing user '315474153'
Preparing user '2475407894'
Preparing user '1254895741690163200'
Preparing user '415142756'
Preparing user '1335059035'
Preparing user '2259260936'
Preparing user '303525188'
Preparing user '978307014605639680'
Preparing user '1270148669371609088'
Preparing user '1460958716'
Preparing user '1548506839'
Preparing user '1076256681334632448'
Preparing user '704481484858658816'
Preparing user '1323229114111844353'
Preparing user '709103640062066688'
Preparing user '1300177071071690753'
Preparing user '1308162986998026240'
Preparing user '4319693115'
Preparing user '2291120303'
Preparing user '863987849921794048'
Preparing user '3991537240'
Preparing user '139596582'
Preparing user '2391934999'
Preparing user '3122854931'
Preparing user '1262830448176234497'
Preparing user '1275811200157257731'
Preparing user '1267828821945794561'
Preparing user '30500949'
Preparing user '1296152764280

Preparing user '223938677'
Preparing user '1259224851518115841'
Preparing user '1211344769877561344'
Preparing user '22327009'
Preparing user '305148929'
Preparing user '1296932965134499840'
Preparing user '2844609570'
Preparing user '700780827928432640'
Preparing user '3378267019'
Preparing user '1912807076'
Preparing user '402965564'
Preparing user '1282129799780036610'
Preparing user '608525749'
Preparing user '960188062348251136'
Preparing user '384376401'
Preparing user '21099562'
Preparing user '1300031675821436928'
Preparing user '1234885878255955968'
Preparing user '1015025453571158016'
Preparing user '1175592411604500480'
Preparing user '1285631360527249408'
Preparing user '1321786666646908930'
Preparing user '279224297'
Preparing user '1309540147713712129'
Preparing user '1104322079585984513'
Preparing user '793473581061550080'
Preparing user '1142068257052139521'
Preparing user '741803'
Preparing user '1021815473116078080'
Preparing user '1305234560909623296'
Preparing user 

Preparing user '1231797674204127232'
Preparing user '1213179368433782784'
Preparing user '1695847002'
Preparing user '871547963138428929'
Preparing user '3821082372'
Preparing user '1483349792'
Preparing user '757180567'
Preparing user '541407054'
Preparing user '1313846792774184960'
Preparing user '1281334262583775233'
Preparing user '1181499662374752256'
Preparing user '2150598933'
Preparing user '1129078644184354819'
Preparing user '1926507704'
Preparing user '27551230'
Preparing user '2667652819'
Preparing user '1255472313828589568'
Preparing user '1274407269283717120'
Preparing user '1311404799879974912'
Preparing user '630598640'
Preparing user '32791122'
Preparing user '2249049186'
Preparing user '238809201'
Preparing user '1159029813136515074'
Preparing user '2853625752'
Preparing user '859413739308802049'
Preparing user '752360551615860736'
Preparing user '2310947501'
Preparing user '212965211'
Preparing user '1257270645634850817'
Preparing user '900744450'
Preparing user '240

Preparing user '191087530'
Preparing user '1010567554778054659'
Preparing user '1957908608'
Preparing user '824290723499384833'
Preparing user '938146845859155968'
Preparing user '1052973267055710208'
Preparing user '305359372'
Preparing user '560146730'
Preparing user '74165881'
Preparing user '64420065'
Preparing user '21119270'
Preparing user '1296483122071121927'
Preparing user '834030626185572353'
Preparing user '866749606754910208'
Preparing user '1304079879818956800'
Preparing user '1160562360949649413'
Preparing user '1315050522399903746'
Preparing user '2809071719'
Preparing user '1313850516208840704'
Preparing user '924776360'
Preparing user '970894987264217088'
Preparing user '3503381716'
Preparing user '1072572644036358146'
Preparing user '1299863840432160769'
Preparing user '10221'
Preparing user '1140049316184317953'
Preparing user '1197284111007830016'
Preparing user '43429285'
Preparing user '936810972551024640'
Preparing user '194681383'
Preparing user '371666872'
Prep

Preparing user '1026872456'
Preparing user '38120230'
Preparing user '269314519'
Preparing user '1018088864'
Preparing user '160194192'
Preparing user '1274076657251278848'
Preparing user '322237905'
Preparing user '832441396229312512'
Preparing user '2287103726'
Preparing user '865355752558481408'
Preparing user '804519904447791104'
Preparing user '1281254068275511298'
Preparing user '134926789'
Preparing user '749581207'
Preparing user '222549201'
Preparing user '1000757636453761025'
Preparing user '1186282148401205248'
Preparing user '963798929484800001'
Preparing user '19408150'
Preparing user '1286967450'
Preparing user '214561638'
Preparing user '603867006'
Preparing user '3301793276'
Preparing user '4839872717'
Preparing user '307168354'
Preparing user '1098955571636719616'
Preparing user '1320235267270316037'
Preparing user '940374803566825478'
Preparing user '132064200'
Preparing user '1274745968261562368'
Preparing user '1168319818648494080'
Preparing user '120121750441424076

Preparing user '37101031'
Preparing user '710262827701895168'
Preparing user '2188509426'
Preparing user '714107258112610304'
Preparing user '74190794'
Preparing user '2890412784'
Preparing user '937785858505113601'
Preparing user '713171548660699139'
Preparing user '2281200498'
Preparing user '333859850'
Preparing user '494534062'
Preparing user '32378319'
Preparing user '780142772485558272'
Preparing user '10450692'
Preparing user '1954119552'
Preparing user '1155415008'
Chunk complete. Waiting 15 minutes.
Preparing chunk. Num users: 300
Preparing user '3342948719'
Preparing user '1425137954'
Preparing user '2342679240'
Preparing user '18704356'
Preparing user '1204260235424260096'
Preparing user '41804973'
Preparing user '343604563'
Preparing user '2808286177'
Preparing user '1011762644871798786'
Preparing user '1251295999684984837'
Preparing user '561742581'
Preparing user '1614277398'
Preparing user '1261824854'
Preparing user '1306297489344036867'
Preparing user '1218204050338066

Preparing user '17059190'
Preparing user '333170394'
Preparing user '743141019038822400'
Preparing user '712402199914090496'
Preparing user '782384588106629120'
Preparing user '633402808'
Preparing user '1312394734153928705'
Preparing user '3275086105'
Preparing user '780598851782578176'
Preparing user '134884062'
Preparing user '252650326'
Preparing user '278011051'
Preparing user '938230728084611072'
Preparing user '501583288'
Preparing user '1003623831054049281'
Preparing user '974890999'
Preparing user '1096192890592788480'
Preparing user '2929428898'
Preparing user '610045909'
Preparing user '858851185'
Preparing user '822793883142721536'
Preparing user '1656298045'
Preparing user '750181981'
Preparing user '1205526166662262786'
Preparing user '2799033651'
Preparing user '1251384846091341829'
Preparing user '60677928'
Preparing user '945813375568764928'
Preparing user '78359025'
Preparing user '2481164516'
Preparing user '819918372829798400'
Preparing user '948676652434935808'
Pre

Preparing user '832488278821478400'
Preparing user '40865987'
Preparing user '56695649'
Preparing user '1300099944087519238'
Preparing user '1310243806969843714'
Preparing user '906652820'
Preparing user '2827118551'
Preparing user '3126808471'
Preparing user '1172158583108186112'
Preparing user '1154983870430896128'
Preparing user '1191718491478315008'
Preparing user '2893330179'
Preparing user '1049039730682355712'
Preparing user '2918103476'
Preparing user '64327804'
Preparing user '1038472670663376897'
Preparing user '101193997'
Preparing user '1033096409837256705'
Preparing user '796058617'
Preparing user '827744659128606720'
Preparing user '19242900'
Preparing user '1002645771563487233'
Preparing user '1199777966445793281'
Preparing user '1314003326007218178'
Preparing user '220987355'
Preparing user '50800093'
Preparing user '1287899781193506822'
Preparing user '125853084'
Preparing user '134360121'
Preparing user '440177372'
Preparing user '4922754109'
Preparing user '401348642

Preparing user '1287213617679499264'
Preparing user '3199991231'
Preparing user '18786007'
Preparing user '881937983573643264'
Preparing user '1242272648450564096'
Preparing user '1084968143011962880'
Preparing user '1374554731'
Preparing user '3435780177'
Preparing user '1494105786'
Preparing user '735516540'
Preparing user '844197745539891204'
Preparing user '876123950912339968'
Preparing user '1252202956222599173'
Preparing user '1251637724295159810'
Preparing user '721292300'
Preparing user '68851677'
Preparing user '1855485667'
Preparing user '817361963965628418'
Preparing user '1311552094638202885'
Preparing user '2795342280'
Preparing user '516645637'
Preparing user '21668549'
Preparing user '791267206965239809'
Preparing user '2518121347'
Preparing user '91828156'
Preparing user '311754622'
Preparing user '1008728578861568000'
Preparing user '2698777905'
Preparing user '806308652860653568'
Preparing user '93949477'
Preparing user '246043067'
Preparing user '1171548764219133958'

Preparing user '119093'
Preparing user '74808975'
Preparing user '1259693559319392256'
Preparing user '1274006401065549824'
Preparing user '836808051579584512'
Preparing user '1000697006'
Preparing user '1258529133983924225'
Preparing user '797297540957872128'
Preparing user '18180382'
Preparing user '1226622638707138566'
Preparing user '1304576005617659905'
Preparing user '1145524573'
Preparing user '1044577164669190144'
Preparing user '3607022476'
Preparing user '1559207672'
Preparing user '1186502711274962944'
Preparing user '1720840812'
Preparing user '357123035'
Preparing user '235290298'
Preparing user '112631234'
Preparing user '344812667'
Preparing user '957328323063025665'
Preparing user '1101630570533130240'
Preparing user '1201553579309273088'
Preparing user '950493462914199552'
Preparing user '1011401371847942144'
Preparing user '1302336487804018688'
Preparing user '2805572258'
Preparing user '18843357'
Preparing user '4399115542'
Preparing user '15074535'
Preparing user '1

Preparing user '714828156188930050'
Preparing user '1011169010'
Preparing user '21120959'
Preparing user '101945207'
Preparing user '636213'
Preparing user '43061708'
Preparing user '952409250'
Preparing user '1274770395871080453'
Preparing user '4859055741'
Preparing user '1188442910024523778'
Preparing user '1059428083'
Preparing user '288679166'
Preparing user '713370069422772228'
Preparing user '22548082'
Preparing user '1278877220589592577'
Preparing user '1264046059464142852'
Preparing user '1246544659440074752'
Preparing user '796267068026978304'
Preparing user '1008164845752147969'
Preparing user '1159829816381726726'
Preparing user '1272765199100116992'
Preparing user '895001616801083393'
Preparing user '613741718'
Preparing user '3552188003'
Preparing user '421564417'
Preparing user '172192086'
Preparing user '781862484357427204'
Preparing user '1172902312018202625'
Preparing user '1297315798839566337'
Preparing user '1323173112171634688'
Preparing user '1298092376443908097'


Preparing user '281684774'
Preparing user '44940175'
Preparing user '147054606'
Preparing user '910431486033219586'
Preparing user '1194043184902524931'
Preparing user '1306995110509453313'
Preparing user '33718657'
Preparing user '88545999'
Preparing user '1273346909093138433'
Preparing user '794365769987592192'
Preparing user '1316434066288279554'
Preparing user '1151353759513153537'
Preparing user '1947973256'
Preparing user '1145427015610376192'
Preparing user '2217802738'
Preparing user '293276217'
Preparing user '3237174011'
Preparing user '42686342'
Preparing user '811733301799043072'
Preparing user '2969960378'
Preparing user '2416746925'
Preparing user '1284224289949048835'
Preparing user '546872514'
Preparing user '2445728719'
Preparing user '1602206142'
Preparing user '579995287'
Preparing user '15473538'
Preparing user '356001558'
Preparing user '2153929366'
Preparing user '1269666360969265152'
Preparing user '1300108818286665729'
Preparing user '813913795848507392'
Prepari

Preparing user '55093843'
Preparing user '17348209'
Preparing user '726523094702149632'
Preparing user '1723358166'
Preparing user '982740968779796482'
Preparing user '1242142337444261894'
Preparing user '11164402'
Preparing user '16511661'
Preparing user '1322203826779283456'
Preparing user '1155679440493514752'
Preparing user '789450572697001984'
Preparing user '2792679654'
Preparing user '467386939'
Preparing user '1220862447927558146'
Preparing user '897173262'
Preparing user '889078033407033344'
Preparing user '99793549'
Preparing user '16335148'
Preparing user '1229799824767115264'
Preparing user '732383057072652289'
Preparing user '750561984635084800'
Preparing user '718358442315030528'
Preparing user '150750813'
Preparing user '1061478791216693248'
Preparing user '751113497031938052'
Preparing user '18842832'
Preparing user '2550707826'
Preparing user '28335862'
Preparing user '2825385169'
Preparing user '374052497'
Preparing user '2831481938'
Preparing user '366887043'
Prepari

Preparing user '1296301823511244802'
Preparing user '1281440918290206720'
Preparing user '34500045'
Preparing user '1201952235162423297'
Preparing user '805793599698903040'
Preparing user '292391727'
Preparing user '18201225'
Preparing user '1114651056'
Preparing user '1004909777409986561'
Preparing user '134877005'
Preparing user '256248462'
Preparing user '219055511'
Preparing user '1121639792276459520'
Preparing user '326654238'
Preparing user '1250788901645680640'
Preparing user '1021270826'
Preparing user '36449179'
Preparing user '1178849055587360768'
Preparing user '21428733'
Preparing user '1218532148703113216'
Preparing user '960339972405444609'
Preparing user '1250667348'
Preparing user '1287109483706163200'
Preparing user '37028819'
Preparing user '1257916383574122498'
Preparing user '1042947974983237632'
Preparing user '33958833'
Preparing user '1105439830408904705'
Preparing user '344938512'
Preparing user '68231209'
Preparing user '1321920152561242113'
Preparing user '245

Preparing user '1236707452059598848'
Preparing user '88431775'
Preparing user '808727314062262272'
Preparing user '4772169998'
Preparing user '2232201168'
Preparing user '964533818693767168'
Preparing user '36297942'
Preparing user '865309623737356290'
Preparing user '1162757906674126849'
Preparing user '3094744772'
Preparing user '50297444'
Preparing user '1206239026635825152'
Preparing user '1013232404771803138'
Preparing user '1259577156885241862'
Preparing user '36195441'
Preparing user '241669292'
Preparing user '3269524008'
Preparing user '1310605878563942401'
Preparing user '3031984573'
Preparing user '45918807'
Preparing user '1251880152814030849'
Preparing user '490634500'
Preparing user '78738051'
Preparing user '829032434222166017'
Preparing user '2362020218'
Preparing user '1248114281075200000'
Preparing user '1313231224341753856'
Preparing user '1138068164216967168'
Preparing user '2834683541'
Preparing user '26613944'
Preparing user '15572108'
Preparing user '750694935553

Preparing user '86438975'
Preparing user '267767046'
Preparing user '124368192'
Preparing user '777338504321851392'
Preparing user '1208532112426569728'
Preparing user '14234463'
Preparing user '1041520392786010114'
Preparing user '1029681661'
Preparing user '1306272776966340609'
Preparing user '1249368565'
Preparing user '841850430284021760'
Preparing user '164575454'
Preparing user '1289269825139662849'
Preparing user '1197345829968723968'
Preparing user '147414448'
Preparing user '783104359294959616'
Preparing user '870720265856200704'
Preparing user '1649171658'
Preparing user '1318189537705758720'
Preparing user '403847531'
Preparing user '1124524388'
Preparing user '16670695'
Preparing user '152223862'
Preparing user '1286104280278024192'
Preparing user '1150796964306534400'
Preparing user '1286323902134837249'
Preparing user '21561797'
Preparing user '60017979'
Preparing user '1184470357010350080'
Preparing user '1488633450'
Preparing user '3255402736'
Preparing user '1214296836

Preparing user '835014553750683648'
Preparing user '54666213'
Preparing user '951987936261758976'
Preparing user '825542715488219136'
Preparing user '1294707406887440385'
Preparing user '1134756847'
Preparing user '20637550'
Preparing user '971592830727569408'
Preparing user '412061173'
Preparing user '901164943'
Preparing user '3328134453'
Preparing user '1036767828010065920'
Preparing user '876674787115925504'
Preparing user '46228719'
Preparing user '25302125'
Preparing user '75184478'
Preparing user '1076288917761077248'
Preparing user '3020253431'
Preparing user '1159020755910373376'
Preparing user '1310927038396723206'
Preparing user '382356496'
Preparing user '959349781'
Preparing user '982428053526556672'
Preparing user '143784028'
Preparing user '4446309432'
Preparing user '1315089265043406848'
Preparing user '162441059'
Preparing user '1288118009127555073'
Preparing user '30901658'
Preparing user '1299200406686281729'
Preparing user '4537022834'
Preparing user '369692012'
Pre

Preparing user '1257120549865615361'
Preparing user '1177767069389078529'
Preparing user '851899495143133184'
Preparing user '701056144043671554'
Preparing user '110536434'
Preparing user '166530562'
Preparing user '1175408933298741248'
Preparing user '1174122046202241024'
Preparing user '805207939732688897'
Preparing user '1193089456426733568'
Preparing user '52008883'
Preparing user '3293372333'
Preparing user '1318239646749323265'
Preparing user '17535751'
Preparing user '1304020889252564992'
Preparing user '119252795'
Preparing user '1298623087735263232'
Preparing user '3087167792'
Preparing user '104315704'
Preparing user '1313862858464256000'
Preparing user '1141090089373536256'
Preparing user '964269647242543104'
Preparing user '4030328855'
Preparing user '1277671648246857730'
Preparing user '1268370735770112002'
Preparing user '27782029'
Preparing user '786420447000748032'
Preparing user '1705183802'
Preparing user '1925513276'
Preparing user '1164903131303600129'
Preparing use

Preparing user '983698103931990019'
Preparing user '31452027'
Preparing user '854953248'
Preparing user '585724885'
Preparing user '1444843555'
Preparing user '209693451'
Preparing user '1239614694232674305'
Preparing user '311226972'
Preparing user '1009435434'
Preparing user '3347960129'
Preparing user '1315082394345656320'
Preparing user '22337949'
Preparing user '346026870'
Preparing user '3294037163'
Preparing user '901262875200692224'
Preparing user '1141853377069891584'
Preparing user '92139823'
Preparing user '857586475617067008'
Preparing user '1707712147'
Preparing user '156388836'
Preparing user '393866934'
Preparing user '243577466'
Preparing user '1274899537597956097'
Preparing user '19905367'
Preparing user '49302783'
Preparing user '1052922578413776896'
Preparing user '2199706066'
Preparing user '25518162'
Preparing user '1263562328278171649'
Preparing user '202900984'
Preparing user '66844810'
Preparing user '617373769'
Preparing user '1290480836312096769'
Preparing use

Preparing user '2990355685'
Preparing user '1294696411036962817'
Preparing user '1301041976868036614'
Preparing user '785300589731164160'
Preparing user '450031130'
Preparing user '329688737'
Preparing user '16439054'
Preparing user '24657650'
Preparing user '1008932556006350849'
Preparing user '1271229580225937408'
Preparing user '106247243'
Preparing user '1147680736113094656'
Preparing user '1199333684446412801'
Preparing user '1215456592436482048'
Preparing user '374279367'
Preparing user '1296855903795322881'
Preparing user '24289574'
Preparing user '1267602574938701825'
Preparing user '1267247910858772481'
Preparing user '1299417348839964678'
Preparing user '880222808990785536'
Preparing user '861367165232062464'
Preparing user '1077628713565474816'
Preparing user '900290720291893249'
Preparing user '1129215895514034176'
Preparing user '1080967473422352384'
Preparing user '144358189'
Preparing user '27277972'
Preparing user '1314942954339958790'
Preparing user '2976653309'
Prepar

Preparing user '126791829'
Preparing user '1172732515448172544'
Preparing user '929670928556023808'
Preparing user '793162749450870784'
Preparing user '1150578554293379073'
Preparing user '1320295383151632390'
Preparing user '327505886'
Preparing user '1313902454988255233'
Preparing user '1207138325657554944'
Preparing user '72966788'
Preparing user '847941936770154496'
Chunk complete. Waiting 15 minutes.
Preparing chunk. Num users: 300
Preparing user '845487333008982021'
Preparing user '1224113098920951808'
Preparing user '14939513'
Preparing user '1308228794017341440'
Preparing user '1305636935440961543'
Preparing user '382382579'
Preparing user '771156653093728256'
Preparing user '302381545'
Preparing user '443054901'
Preparing user '1613858982'
Preparing user '24765075'
Preparing user '2523965048'
Preparing user '1204419589'
Preparing user '1235527794706309121'
Preparing user '1292692613099790336'
Preparing user '1226149408607293443'
Preparing user '1241975445987504128'
Preparing u

Preparing user '1287577431033823232'
Preparing user '564674807'
Preparing user '173029778'
Preparing user '189599198'
Preparing user '1297902691662860293'
Preparing user '796633370512019456'
Preparing user '1096756622343303168'
Preparing user '319980481'
Preparing user '1179710990'
Preparing user '1199718871592161281'
Preparing user '1240543760091697153'
Preparing user '1511337289'
Preparing user '816728170002927616'
Preparing user '992128023578271745'
Preparing user '1087490806615822336'
Preparing user '20572543'
Preparing user '1321077638342037504'
Preparing user '800086251399319553'
Preparing user '4432222041'
Preparing user '1173707986038067200'
Preparing user '1273612436726321156'
Preparing user '11622712'
Preparing user '426257806'
Preparing user '1202393685268324353'
Preparing user '1304293262836903937'
Preparing user '756566119046115328'
Preparing user '1311664552333864968'
Preparing user '455402054'
Preparing user '1295377833632714755'
Preparing user '1299795722083201024'
Prep

Preparing user '15007299'
Preparing user '2294195549'
Preparing user '256113186'
Preparing user '1299090189881671682'
Preparing user '758566322376704000'
Preparing user '299866437'
Preparing user '721460186565611520'
Preparing user '1202991040648687621'
Preparing user '1245915638754709504'
Preparing user '2889709827'
Preparing user '885329795495731201'
Preparing user '23343780'
Preparing user '2445458406'
Preparing user '22846612'
Preparing user '1278305509922287618'
Preparing user '1021871507595816960'
Preparing user '164347969'
Preparing user '1282460926042681350'
Preparing user '470772685'
Preparing user '503789953'
Preparing user '101748208'
Preparing user '1254235982599159812'
Preparing user '2485647327'
Preparing user '1146614522523848705'
Preparing user '607405211'
Preparing user '3366293969'
Preparing user '920312283821158400'
Preparing user '1243353333282279424'
Preparing user '883198486824443904'
Preparing user '430809891'
Preparing user '1211263502179897344'
Preparing user '

Preparing user '341930987'
Preparing user '4739313036'
Preparing user '1275260721987645440'
Preparing user '747407531639775232'
Preparing user '3094704119'
Preparing user '120220928'
Preparing user '826126987173433344'
Preparing user '846440930'
Preparing user '212346643'
Preparing user '3427256506'
Preparing user '541542123'
Preparing user '3290669567'
Preparing user '1131539276498460672'
Preparing user '316063176'
Preparing user '876921172105068545'
Preparing user '2556603634'
Preparing user '703699662692360194'
Preparing user '395630587'
Preparing user '255824309'
Preparing user '486624141'
Preparing user '1128812596444966914'
Preparing user '2727518997'
Preparing user '489824177'
Preparing user '147017690'
Preparing user '379378754'
Preparing user '1265399166492127238'
Preparing user '1321279389024268291'
Preparing user '285076545'
Preparing user '278616909'
Preparing user '750008840939331585'
Preparing user '1413104335'
Preparing user '1315105427034533890'
Preparing user '95624069

Preparing user '891342748397318144'
Preparing user '711397136886145025'
Preparing user '2377336159'
Preparing user '14206115'
Preparing user '1219272445'
Preparing user '1243747408049098752'
Preparing user '1323308730209980417'
Preparing user '229274063'
Preparing user '197496309'
Preparing user '63162490'
Preparing user '1317695234940178432'
Preparing user '2413273436'
Preparing user '2760865966'
Preparing user '1248543036046442501'
Preparing user '1164856059732697088'
Preparing user '2616847636'
Preparing user '208863441'
Preparing user '785529566697656320'
Preparing user '3294485143'
Preparing user '1001774000597594112'
Preparing user '89536287'
Preparing user '1096958247557361669'
Preparing user '1164627598028226561'
Preparing user '453707098'
Preparing user '103477496'
Preparing user '26676436'
Preparing user '32804469'
Preparing user '32739250'
Preparing user '1323044076107739136'
Preparing user '90480218'
Preparing user '1041739676594462720'
Preparing user '962388305001271297'
P

Preparing user '1211875314621534208'
Preparing user '741376308'
Preparing user '797154744548651008'
Preparing user '427411903'
Preparing user '1042591638'
Preparing user '606146933'
Preparing user '1282027348276248576'
Preparing user '3174843730'
Preparing user '440464127'
Preparing user '15088605'
Preparing user '969740668380749824'
Preparing user '17943624'
Preparing user '2690955980'
Preparing user '23440586'
Preparing user '274274016'
Preparing user '1390597153'
Preparing user '266326338'
Preparing user '164150683'
Preparing user '1074979748'
Preparing user '372529808'
Preparing user '1230875656747208705'
Preparing user '1123369091878641664'
Preparing user '1318953209373839362'
Preparing user '372787472'
Preparing user '3067135782'
Preparing user '378072859'
Preparing user '2859023885'
Preparing user '473476604'
Preparing user '1219356108869001216'
Preparing user '4214519339'
Preparing user '712852680000073728'
Preparing user '964016566730928128'
Preparing user '893529009543426050'

Preparing user '1156275814868168704'
Preparing user '1277610572012498944'
Preparing user '15632337'
Preparing user '1304127684633649156'
Preparing user '788700982221893632'
Preparing user '3138946873'
Preparing user '1927985708'
Preparing user '459302182'
Preparing user '1241040541950803968'
Preparing user '1316890942079528960'
Preparing user '32864795'
Preparing user '1634742997'
Preparing user '1243576765936807937'
Preparing user '772691847802216448'
Preparing user '30837404'
Preparing user '1318234469921361921'
Preparing user '242421463'
Preparing user '1308192476767764480'
Preparing user '4385727183'
Preparing user '1242155368387657728'
Preparing user '842784818'
Preparing user '869785084479631360'
Preparing user '2334210600'
Preparing user '801303877'
Preparing user '1196213158433771523'
Preparing user '3771043037'
Preparing user '903313767596822528'
Preparing user '4861611379'
Preparing user '1317879361190694912'
Preparing user '248469208'
Preparing user '142096963'
Preparing use

Preparing user '1705391150'
Preparing user '1273505240050855938'
Preparing user '556358153'
Chunk complete. Waiting 15 minutes.
Preparing chunk. Num users: 300
Preparing user '1159570378844323840'
Preparing user '979057500212400128'
Preparing user '1858435243'
Preparing user '384800892'
Preparing user '89535688'
Preparing user '630252907'
Preparing user '230796821'
Preparing user '46137585'
Preparing user '1281268155101392898'
Preparing user '1249270652156219392'
Preparing user '746034707209527296'
Preparing user '1129162895647023109'
Preparing user '310546956'
Preparing user '824808971261378560'
Preparing user '3308592113'
Preparing user '2810460354'
Preparing user '1272770048'
Preparing user '1317680521250435072'
Preparing user '759198419739090944'
Preparing user '201963182'
Preparing user '1322054991528546304'
Preparing user '1901740502'
Preparing user '1375536284'
Preparing user '1042763115207778304'
Preparing user '1316214703496941568'
Preparing user '73899524'
Preparing user '131

Preparing user '1233031329597599744'
Preparing user '2392247028'
Preparing user '1197177917211578370'
Preparing user '556480811'
Preparing user '823924739051835393'
Preparing user '1246202769662291969'
Preparing user '1084896359721910272'
Preparing user '703503924'
Preparing user '622286080'
Preparing user '1237385648035614720'
Preparing user '1322637393191669760'
Preparing user '2389419156'
Preparing user '969635344462344194'
Preparing user '23023965'
Preparing user '234773300'
Preparing user '1287969203748732931'
Preparing user '305291107'
Preparing user '1091141124574638080'
Preparing user '1257051224'
Preparing user '1207435373850189826'
Preparing user '1186275111592222723'
Preparing user '3871502234'
Preparing user '1089937044435030016'
Preparing user '592945059'
Preparing user '1248018965881896961'
Preparing user '879521086534189056'
Preparing user '4190667705'
Preparing user '2340995202'
Preparing user '1245057759768444938'
Preparing user '3193046374'
Preparing user '3325778140'

Preparing user '920087392048046080'
Preparing user '327491528'
Preparing user '1261681896506880000'
Preparing user '564044174'
Preparing user '2838159825'
Preparing user '64829826'
Preparing user '1061989359744925697'
Preparing user '701815183170113536'
Preparing user '1224546660791017472'
Preparing user '414010452'
Preparing user '792450594661404672'
Preparing user '3680034562'
Preparing user '36569231'
Preparing user '18272274'
Preparing user '605415583'
Preparing user '785284455770497024'
Preparing user '1163458704915730432'
Preparing user '368778613'
Preparing user '136717052'
Preparing user '1316111836664737793'
Preparing user '839962012964835328'
Preparing user '462451128'
Preparing user '291935994'
Preparing user '4031877857'
Preparing user '1216574102720385024'
Preparing user '75852634'
Preparing user '1066346133386141698'
Preparing user '111486145'
Preparing user '1279586549991198720'
Preparing user '1298963325078900736'
Preparing user '883343376187351040'
Preparing user '3008

In [None]:
csv_file.close()

In [None]:
bot_probas

In [None]:
data['bot_proba'] = data['user_id'].map(bot_probas)
data['verifieds'] = data['user_id'].map(is_verified)
data.to_pickle("pickle/tw_proba_verif.pick")

In [None]:
verifieds

In [None]:
data['verified'] = data['user_id'].map(verifieds)
data['bot_proba'] = data['user_id'].map(bot_probas)
