## Implementing Chris Doenlen's 'Bot Or Not' Python Module

Everything from `twitter_funcs.py` was cloned from Chris' [repository](https://github.com/scrapfishies/twitter-bot-detection).

I will use this to label each user as 'bot' (boolean 1/0).

In [1]:
import pandas as pd
import numpy as np
from copy import deepcopy
from twitter_funcs import *

# Imports
import os
import numpy as np
import pandas as pd

import pickle
import tweepy

from datetime import datetime
from secrets import api_secret_key, api_key, bearer_token
import re
import time
import csv

In [2]:
data = pd.read_pickle("pickle/n2_tokenized.pick")

In [3]:
data.head(3)

Unnamed: 0,trump,biden,hashtags,user_id,original,tweet
181142,False,True,[],2820503362,All these articles showing that Biden is in th...,showing joe_biden lead ignore still vote showi...
0,False,True,[],1312487180258820096,@FoxNews Lady Gaga’s a nobody. Can’t figure ou...,lady nobody figure life even see nobody help j...
4,False,True,[],2335763630,@The_Grupp “It is purely a fortuity that this ...,purely fortuity great mass casualty history jo...


I'm getting a rate limit error. according to twitter site, I can lookup 300 users per 15 minutes. Let's try that.

In [4]:
def chunks(user_ids, n):
    """Yield successive n-sized chunks from user_ids (iterable)."""
    lst = list(user_ids)
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
    


Here's where we'll implement Chris Doenlen's 'Bot or Not' model.

In [5]:
with open("bot_model.pick", "rb") as read_file:
    xgb_model = pickle.load(read_file)

In [6]:
# time.sleep(60*15)

In [None]:
verifieds = {}
bot_probas = {}

all_users = list(data.user_id.unique())
print(f"Total number of users to scrape: {len(all_users)}")
exist = pd.read_csv("../data/user_stats.csv")
exist_users = list(exist.user_id.unique())
print(f"Number of users already scraped: {len(exist_users)}")

user_ids = []
for user in exist_users:
    if user in all_users:
        continue
    user_ids.append(user)
    
print(f"Preparing to identify bots for {len(user_ids)} users...")

user_id_chunks = list(chunks(user_ids, n=300))

# now get stats for new users

csv_file = open("../data/user_stats.csv", "a")
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["user_id", "bot_proba", "verified"])
for chunk in user_id_chunks:
    print(f"Preparing chunk. Num users: {len(chunk)}")
    for user_id in chunk:
        print(f"Preparing user '{user_id}'")
        
        
        auth = tweepy.OAuthHandler(api_key, api_secret_key)
        api = tweepy.API(auth)
        
        try: # Gather features for bot/not bot model
            # Get user information from screen name
            user = api.get_user(user_id)

            # account features to return for predicton
            account_age_days = (datetime.now() - user.created_at).days
            verified = user.verified # will also use this in our data
            geo_enabled = user.geo_enabled
            default_profile = user.default_profile
            default_profile_image = user.default_profile_image
            favourites_count = user.favourites_count
            followers_count = user.followers_count
            friends_count = user.friends_count
            statuses_count = user.statuses_count
            average_tweets_per_day = np.round(statuses_count / account_age_days, 3)

            # manufactured features
            hour_created = int(user.created_at.strftime("%H"))
            network = np.round(np.log(1 + friends_count) * np.log(1 + followers_count), 3)
            tweet_to_followers = np.round(
                np.log(1 + statuses_count) * np.log(1 + followers_count), 3
            )
            follower_acq_rate = np.round(
                np.log(1 + (followers_count / account_age_days)), 3
            )
            friends_acq_rate = np.round(np.log(1 + (friends_count / account_age_days)), 3)

            # organizing list to be returned
            account_features = [
                verified, hour_created,geo_enabled,default_profile,default_profile_image,favourites_count,
                followers_count,friends_count,statuses_count,average_tweets_per_day,network,tweet_to_followers,
                follower_acq_rate,friends_acq_rate]

            if account_features == np.nan:
                proba = np.nan
                verified = np.nan
                csv_writer.writerow([user_id, proba, verified])
                continue

            else:
                user_m = np.matrix(account_features)
                proba = np.round(xgb_model.predict_proba(user_m)[:, 1][0] * 100, 2)
                verified = account_features[0]
                csv_writer.writerow([user_id, proba, verified])

        except:
            print(f'error encountered, skipping user {user_id}')
            proba = np.nan
            verified = np.nan
        
            csv_writer.writerow([user_id, proba, verified])
    print("Chunk complete. Waiting 15 minutes.")
    time.sleep(15*60+1)

csv_file.close()

Total number of users to scrape: 34383
Number of users already scraped: 20101
Preparing to identify bots for 13541 users...
Preparing chunk. Num users: 300
Preparing user '105937456'
Preparing user '46538055'
Preparing user '100625142'
Preparing user '1288981025364860928'
Preparing user '65067002'
Preparing user '2562614485'
Preparing user '2246207654'
Preparing user '1066144442241953792'
Preparing user '1007960516'
Preparing user '1118489465008336897'
Preparing user '1206954619789856769'
Preparing user '1243870145229008896'
Preparing user '1658455681'
Preparing user '989298900'
Preparing user '1267800950321876994'
Preparing user '807989050972000256'
Preparing user '838183322'
Preparing user '893684694759362561'
Preparing user '1282581117632000000'
Preparing user '709149862227484672'
Preparing user '3071943960'
Preparing user '761111078201810944'
Preparing user '757727571807051776'
Preparing user '1194715921908023296'
Preparing user '143975787'
Preparing user '2952274270'
Preparing use

Preparing user '1133485857841848320'
Preparing user '1171412312999288832'
Preparing user '2787215427'
Preparing user '500289229'
Preparing user '2213540755'
Preparing user '1840457510'
Preparing user '824171096'
Preparing user '821029492030377984'
Preparing user '3158618551'
Preparing user '898218643888197632'
Preparing user '97413558'
Preparing user '59810886'
Preparing user '2986724116'
Preparing user '1201176493'
Preparing user '812919639894331392'
Preparing user '1229203145676320769'
Preparing user '74333189'
Preparing user '16843499'
Preparing user '2380623400'
Preparing user '1257544806185799680'
Preparing user '1253818448410316802'
Preparing user '1290427170872066053'
Preparing user '1201662807860756480'
Preparing user '758569471351070720'
Preparing user '277524205'
Preparing user '1247962815035510784'
Preparing user '44748304'
Preparing user '3084606262'
Preparing user '1274357130427056129'
Preparing user '16458709'
Preparing user '195645207'
Preparing user '615915855'
Preparin

Preparing user '2244706798'
Preparing user '3177009068'
Preparing user '249721328'
Preparing user '2772901864'
Preparing user '2936945615'
Preparing user '992188848770822145'
Preparing user '2617596283'
Preparing user '25790437'
Preparing user '1254895741690163200'
Preparing user '415142756'
Preparing user '1335059035'
Preparing user '2259260936'
Preparing user '978307014605639680'
Preparing user '1460958716'
Preparing user '1548506839'
Preparing user '1323229114111844353'
Preparing user '709103640062066688'
Preparing user '1300177071071690753'
Preparing user '4319693115'
Preparing user '2291120303'
Preparing user '3991537240'
Preparing user '139596582'
Preparing user '1262830448176234497'
Preparing user '30500949'
Preparing user '1296152764280709121'
Preparing user '31169435'
Preparing user '2863437856'
Preparing user '1061410712831045632'
Preparing user '1311862962903298049'
Preparing user '1337922428'
Preparing user '49478496'
Preparing user '1087520123089575937'
Preparing user '979

Preparing user '19604028'
Preparing user '1049474297134223360'
Preparing user '2722275065'
Preparing user '26042009'
Preparing user '1217173636634808321'
Preparing user '1218032038386769920'
Preparing user '713461348034813956'
Preparing user '91981041'
Preparing user '115071051'
Preparing user '122254934'
Preparing user '1299152836182245376'
Preparing user '3926587454'
Preparing user '862154071536312320'
Preparing user '1070096619314524162'
Preparing user '125502420'
Preparing user '1283547777948160001'
Preparing user '2361817734'
Preparing user '944758932442157056'
Preparing user '1597550012'
Preparing user '559718461'
Preparing user '1004491234683994112'
Preparing user '1239351932550107136'
Preparing user '1046130110'
Preparing user '1852601298'
Preparing user '2163022478'
Preparing user '1293531609933721600'
Preparing user '133088013'
Preparing user '1403361253'
Preparing user '2574582585'
Preparing user '747854636476948481'
Preparing user '120839423'
Preparing user '344453560'
Prep

Preparing user '834030626185572353'
Preparing user '866749606754910208'
Preparing user '1315050522399903746'
Preparing user '1313850516208840704'
Preparing user '924776360'
Preparing user '970894987264217088'
Preparing user '3503381716'
Preparing user '1072572644036358146'
Preparing user '1299863840432160769'
Preparing user '10221'
Preparing user '1140049316184317953'
Preparing user '1197284111007830016'
Preparing user '43429285'
Preparing user '936810972551024640'
Preparing user '194681383'
Preparing user '1030301449547210752'
Preparing user '1314641883235258373'
Preparing user '773451838939099137'
Preparing user '3369255861'
Preparing user '31326363'
Preparing user '2634695718'
Preparing user '863982793319215106'
Preparing user '801362359877009408'
Preparing user '163657957'
Preparing user '1115505571908804610'
Preparing user '1182070033587036162'
Preparing user '874339847439355907'
Preparing user '1289418249222807552'
Preparing user '1320899712103440387'
Preparing user '889033997942

Preparing user '1170169525393854464'
Preparing user '1285693469734981638'
Preparing user '2842833556'
Preparing user '116637128'
Preparing user '86378821'
Preparing user '112958063'
Preparing user '1029687140'
Preparing user '152401104'
Preparing user '34820804'
Preparing user '1146030692658118657'
Preparing user '25204737'
Preparing user '335680163'
Preparing user '889358351762673664'
Preparing user '969270963979345921'
Preparing user '435498614'
Preparing user '1223977744318386176'
Preparing user '319383173'
Preparing user '57688799'
Preparing user '824674093551087616'
Preparing user '4487950754'
Preparing user '2914025376'
Preparing user '1319442621929062400'
Preparing user '388602014'
Preparing user '808493729728241664'
Preparing user '2251183430'
Preparing user '3381595551'
Preparing user '200908578'
Preparing user '3018201986'
Preparing user '196027821'
Preparing user '1294568924504260608'
Preparing user '1241576251'
Preparing user '2361832015'
Preparing user '1276324518169018370

Preparing user '200525816'
Preparing user '1229077473830137857'
Preparing user '1267621773366919169'
Preparing user '792562460'
Preparing user '589650388'
Preparing user '1110268747074408449'
Preparing user '1239224447187456001'
Preparing user '1307016262401183745'
Preparing user '111791598'
Preparing user '856952214052626433'
Preparing user '3258211885'
Preparing user '2373271256'
Preparing user '1175079112102371328'
Preparing user '335352825'
Preparing user '333170394'
Preparing user '712402199914090496'
Preparing user '782384588106629120'
Preparing user '633402808'
Preparing user '252650326'
Preparing user '938230728084611072'
Preparing user '501583288'
Preparing user '1003623831054049281'
Preparing user '974890999'
Preparing user '1096192890592788480'
Preparing user '2929428898'
Preparing user '610045909'
Preparing user '822793883142721536'
Preparing user '1656298045'
Preparing user '750181981'
Preparing user '1205526166662262786'
Preparing user '1251384846091341829'
Preparing user

Preparing user '244900398'
Preparing user '1184964035126546432'
Preparing user '13117642'
Preparing user '58600561'
Preparing user '705284035019460608'
Preparing user '25967290'
Preparing user '1210310224734806023'
Preparing user '1255305312250277890'
Preparing user '1206589313351327746'
Preparing user '1320193196601667584'
Preparing user '1242945424186638340'
Preparing user '999441570188292096'
Preparing user '1319517462203777024'
Preparing user '522384942'
Preparing user '2287226102'
Preparing user '862397514833592321'
Preparing user '1216899125901393921'
Preparing user '1556748049'
Preparing user '47476339'
Preparing user '1266888962821554176'
Preparing user '1323315923248320512'
Preparing user '1209469566662926336'
Preparing user '28633302'
Preparing user '1917731'
Preparing user '264159222'
Preparing user '26020465'
Preparing user '403023454'
Preparing user '16312576'
Preparing user '804098150650646530'
Preparing user '1025178481074819072'
Preparing user '1298491472057171970'
Prep

Preparing user '1266256461144834049'
Preparing user '1290410454532198401'
Preparing user '1229393297707347968'
Preparing user '931322034470227968'
Preparing user '821399873518006278'
Preparing user '1225930531'
Preparing user '1296588380155916288'
Preparing user '1287239077423714305'
Preparing user '2939227271'
Preparing user '815745130078445568'
Preparing user '1275519561610285056'
Preparing user '406414418'
Preparing user '3655673595'
Preparing user '453191260'
Preparing user '1032282225444839424'
Preparing user '1229048541936529408'
Preparing user '15724160'
Preparing user '1293157441509695490'
Preparing user '862992242'
Preparing user '498018121'
Preparing user '848621029962301440'
Preparing user '570247683'
Preparing user '1287551670830772224'
Preparing user '1088584837513048065'
Preparing user '334798322'
Chunk complete. Waiting 15 minutes.
Preparing chunk. Num users: 300
Preparing user '35917152'
Preparing user '27512314'
Preparing user '2392294124'
Preparing user '773874589'
Pr

Preparing user '406276049'
Preparing user '1005593216966590464'
Preparing user '122729362'
Preparing user '1262342226'
Preparing user '1320867042782752770'
Preparing user '59553418'
Preparing user '1215464293879009280'
Preparing user '3004083173'
Preparing user '1283141803408011264'
Preparing user '1050933823704322048'
Preparing user '1179942791566254081'
Preparing user '1549463612'
Preparing user '18887201'
Preparing user '250729259'
Preparing user '811367821'
Preparing user '16261416'
Preparing user '1273756612994445315'
Preparing user '1311451225683095552'
Preparing user '2890551649'
Preparing user '2665006954'
Preparing user '484296387'
Preparing user '21546203'
Preparing user '3400861672'
Preparing user '1281231424503164929'
Preparing user '1305327097108459521'
Preparing user '2296706257'
Preparing user '1026954914'
Preparing user '1168281614'
Preparing user '926578068'
Preparing user '1300537503041060864'
Preparing user '610980533'
Preparing user '1077039234580545536'
Preparing u

Preparing user '484100085'
Preparing user '795010354799636480'
Preparing user '709373688'
Preparing user '1107725463953256454'
Preparing user '152900581'
Preparing user '1316883623954518024'
Preparing user '1021764695085203456'
Preparing user '225958057'
Preparing user '75683228'
Preparing user '289042386'
Preparing user '722976374764527616'
Preparing user '28908312'
Preparing user '1723862714'
Preparing user '1188257001333092353'
Preparing user '233657713'
Preparing user '853699429'
Preparing user '923256849461399552'
Preparing user '338040885'
Preparing user '1279063824016871424'
Preparing user '126136711'
Preparing user '17348209'
Preparing user '726523094702149632'
Preparing user '982740968779796482'
Preparing user '1242142337444261894'
Preparing user '11164402'
Preparing user '16511661'
Preparing user '1322203826779283456'
Preparing user '1155679440493514752'
Preparing user '789450572697001984'
Preparing user '467386939'
Preparing user '1220862447927558146'
Preparing user '8971732

Preparing user '1096374256412311554'
Preparing user '744130280'
Preparing user '314544686'
Preparing user '1229758297990934529'
Preparing user '750285599718989824'
Preparing user '336378370'
Preparing user '1321969356574699525'
Preparing user '1088285702297505792'
Preparing user '1006023543002423297'
Preparing user '212806156'
Preparing user '403095158'
Preparing user '147807232'
Preparing user '1095459895816667136'
Preparing user '941462965160632320'
Preparing user '1218141761639460864'
Preparing user '739994934133067776'
Preparing user '2400829158'
Preparing user '912836471278702598'
Preparing user '269998510'
Preparing user '234582492'
Preparing user '735694282741284864'
Preparing user '80954413'
Preparing user '537725708'
Preparing user '1172988543012093953'
Preparing user '957994813202337794'
Preparing user '4896705077'
Preparing user '1248822636932546560'
Preparing user '826619048'
Preparing user '4892456343'
Preparing user '1121902733689335808'
Preparing user '1460903310'
Prepar

Preparing user '1306039518542397440'
Preparing user '1033529575316369408'
Preparing user '238706137'
Preparing user '39983401'
Preparing user '1261468378205650948'
Preparing user '52943120'
Preparing user '1277674412964110336'
Preparing user '920988486'
Preparing user '4872219082'
Preparing user '937839035816476674'
Preparing user '1026311432894402561'
Preparing user '1291579022028275713'
Preparing user '857117473'
Preparing user '44401542'
Preparing user '705936824280043520'
Preparing user '1292975225651380224'
Preparing user '4643793614'
Preparing user '1020469511038939137'
Preparing user '5958122'
Preparing user '253683382'
Preparing user '2826118047'
Preparing user '1019695664094109697'
Preparing user '997621789848059904'
Preparing user '711639814525874178'
Preparing user '12469692'
Preparing user '1139348340569714689'
Preparing user '2328070701'
Preparing user '54482352'
Preparing user '1201464973081362432'
Preparing user '14236800'
Preparing user '1278213166950871040'
Preparing u

Preparing user '1290324690389565441'
Preparing user '255665581'
Preparing user '281334085'
Preparing user '1012262622526410753'
Preparing user '308033771'
Preparing user '112280016'
Preparing user '182798155'
Preparing user '2393098723'
Preparing user '454515801'
Preparing user '2194899020'
Preparing user '1273778068885254144'
Preparing user '3896205508'
Preparing user '1134408978752917504'
Preparing user '1199834603348987904'
Preparing user '797289762512138240'
Preparing user '1258853622236033024'
Preparing user '248794070'
Preparing user '34788803'
Preparing user '860157576364863488'
Preparing user '1226666246206377984'
Preparing user '746757059429941248'
Preparing user '27998211'
Preparing user '432071674'
Preparing user '67531083'
Preparing user '1231600418918223873'
Preparing user '471905581'
Preparing user '32297425'
Preparing user '48940256'
Preparing user '421572531'
Preparing user '1213653380226154497'
Preparing user '299130301'
Preparing user '1968011196'
Preparing user '3804

Preparing user '30332775'
Preparing user '31029596'
Preparing user '1598812760'
Preparing user '349186265'
Preparing user '208753892'
Preparing user '1281834634331000832'
Preparing user '16449729'
Preparing user '1272352341979774976'
Preparing user '815607601484484608'
Preparing user '63164487'
Preparing user '1339399483'
Preparing user '3153110927'
Preparing user '1328407309'
Preparing user '230357585'
Preparing user '569623730'
Preparing user '1270825010479988736'
Preparing user '570202211'
Preparing user '831282290940637185'
Preparing user '742042952369725440'
Preparing user '2471222959'
Preparing user '1393504592'
Preparing user '240418265'
Preparing user '1319466360175513600'
Preparing user '747273285411315713'
Preparing user '2230896236'
Preparing user '1155632827377094656'
Preparing user '1316562949515948032'
Preparing user '1322601468084412421'
Preparing user '40831938'
Preparing user '983698103931990019'
Preparing user '854953248'
Preparing user '1444843555'
Preparing user '20

Preparing user '206870571'
Preparing user '1041141397418647552'
Preparing user '1059592291374022657'
Chunk complete. Waiting 15 minutes.
Preparing chunk. Num users: 300
Preparing user '1132419274365120527'
Preparing user '874834131821092864'
Preparing user '16271756'
Preparing user '2653903479'
Preparing user '771551535935262722'
Preparing user '1302398486235684865'
Preparing user '1265398436599410691'
Preparing user '206086913'
Preparing user '4231753156'
Preparing user '1148690147296178177'
Preparing user '1289284003095347200'
Preparing user '17593609'
Preparing user '346132372'
Preparing user '372220090'
Preparing user '264961704'
Preparing user '31029074'
Preparing user '2330796883'
Preparing user '30075085'
Preparing user '908919812163817472'
Preparing user '583568522'
Preparing user '1928439278'
Preparing user '1216757035267588096'
Preparing user '785983309578764288'
Preparing user '174865318'
Preparing user '96685181'
Preparing user '925764449407029248'
Preparing user '130232404

Preparing user '2981813110'
Preparing user '29632025'
Preparing user '3098383531'
Preparing user '387373139'
Preparing user '532673885'
Preparing user '95485773'
Preparing user '971218637347418113'
Preparing user '1238542769464975360'
error encountered, skipping user 1238542769464975360
Preparing user '716643162739965952'
Preparing user '75337539'
Preparing user '218063694'
Preparing user '2177380997'
Preparing user '954516699751075842'
Preparing user '971122621'
Preparing user '21140480'
Preparing user '2800100281'
Preparing user '1265072609131032576'
Preparing user '725443217097953280'
Preparing user '1036469966'
Preparing user '3037205607'
Preparing user '1232677701921779714'
Preparing user '3303969920'
Preparing user '168028018'
Preparing user '16826186'
Preparing user '22628621'
Preparing user '148082236'
Preparing user '49024491'
Preparing user '4476436276'
Preparing user '4122803194'
Preparing user '409868039'
Preparing user '22283899'
Preparing user '2571831456'
Preparing user 

Preparing user '1216425494796800001'
Preparing user '277997059'
Preparing user '1317825621687439362'
Preparing user '1029784140'
Preparing user '499369093'
Preparing user '1271563679582924802'
Preparing user '2861699530'
Preparing user '3280277911'
Preparing user '290948075'
Preparing user '1005327478200528896'
Preparing user '826569252962496513'
Preparing user '2360141425'
Preparing user '1322376216683421697'
Preparing user '3282486619'
Preparing user '37771543'
Preparing user '1288637920342167556'
Preparing user '1290088476961132544'
Preparing user '1023136526'
Preparing user '868042119721738240'
Preparing user '873634388235268096'
Preparing user '1861830169'
Preparing user '340328431'
Preparing user '1117606113535516672'
Preparing user '28223588'
Preparing user '124091966'
Preparing user '895973879226740736'
Preparing user '1135082421529919488'
Preparing user '888033951352336384'
Preparing user '1267926649041031172'
Preparing user '1180243613986496512'
Preparing user '12531413383312

Preparing user '823260101238722560'
Preparing user '17469072'
Preparing user '22875500'
Preparing user '19742910'
Preparing user '1166944443196411904'
Preparing user '1032803241246044160'
Preparing user '2995106073'
Preparing user '21540170'
Preparing user '96870081'
Preparing user '123394078'
Preparing user '879341987861852160'
Preparing user '1152376195088048128'
Preparing user '1120846519794634752'
Preparing user '107514431'
Preparing user '782247601487814657'
Preparing user '303471089'
Preparing user '1061316745569452033'
Preparing user '15440802'
Preparing user '49535307'
Preparing user '4183713737'
Preparing user '1001975662373163008'
Preparing user '165572762'
Preparing user '576801455'
Preparing user '889941713824960513'
Preparing user '1524790842'
Preparing user '1246262039070347272'
Preparing user '1294807637549178880'
Preparing user '18011146'
Preparing user '830658335053664258'
Preparing user '39138581'
Preparing user '292696105'
Preparing user '760983317206491136'
Preparin

Preparing user '1248625509744791553'
Preparing user '152666403'
Preparing user '785855476068212738'
Preparing user '1288290605857034240'
error encountered, skipping user 1288290605857034240
Preparing user '323459723'
Preparing user '880298688282845190'
Preparing user '1273408170011000834'
Preparing user '66143876'
Preparing user '64967211'
Preparing user '24288286'
Preparing user '122756779'
Preparing user '1298482669605523458'
Preparing user '729060957448769536'
Preparing user '71872365'
Preparing user '1108773974937427968'
Preparing user '1155244835936317440'
Preparing user '39804437'
Preparing user '1130144680484769794'
Preparing user '1303515122166427648'
Preparing user '1130837530633490432'
Preparing user '3401689749'
Preparing user '364570975'
Preparing user '1311746214870777858'
Preparing user '1279762210143834113'
Preparing user '1205203093337579521'
Preparing user '1314032858260889600'
Preparing user '44802806'
Preparing user '1403305032'
Preparing user '2183854060'
Preparing 

Preparing user '786677101365723136'
Preparing user '865653796441006080'
Preparing user '1283637451211149312'
Preparing user '1322792577419014144'
Preparing user '1298974744046448641'
Preparing user '853660787361157120'
Preparing user '1282714320133066752'
Preparing user '348687298'
Preparing user '80944618'
Preparing user '1322282931335868418'
Preparing user '715640726286163968'
Preparing user '18748155'
error encountered, skipping user 18748155
Preparing user '816507866462978048'
Preparing user '715192278'
Preparing user '1284613539806752773'
Preparing user '1252034726937976833'
Preparing user '1318974653768081409'
Preparing user '566644612'
Preparing user '836725542992896000'
Preparing user '342496730'
Preparing user '1296461037412257796'
Preparing user '90735739'
Preparing user '843399737428516864'
Preparing user '198495477'
Preparing user '1305828122'
Preparing user '1202757891737956352'
Preparing user '4257692487'
Preparing user '400811591'
Preparing user '1194359122457706496'
Pre

Preparing user '260810975'
Preparing user '1106329410439770112'
Preparing user '1239490145801449477'
Preparing user '2875912888'
Preparing user '942474052567957506'
Preparing user '926710934'
Preparing user '1268449197457637377'
Preparing user '1202777911536816128'
Preparing user '736418946300010496'
Preparing user '897846669659840512'
Preparing user '1211037766923902977'
Preparing user '749349960232136705'
Preparing user '1161991474566840321'
Preparing user '232185551'
Preparing user '1227699503718969345'
Preparing user '74775558'
Preparing user '172059661'
Preparing user '1246388675090550785'
Preparing user '736058641535508487'
Preparing user '784583038294433792'
Preparing user '266838334'
Preparing user '61235984'
Preparing user '1292614733170868226'
Preparing user '1317847607356592128'
Preparing user '41789786'
Preparing user '1277954717604122630'
Preparing user '204477027'
Preparing user '895683531892035584'
Preparing user '1312702980391669763'
Preparing user '273661134'
Preparing

Preparing user '14707316'
Preparing user '2479976205'
Preparing user '931352462551691264'
Preparing user '2221198059'
Preparing user '98698876'
Preparing user '1264902037940121601'
Preparing user '220053608'
Preparing user '1129045502346055680'
Preparing user '3230119200'
Preparing user '1009945881016193024'
Preparing user '825143150'
Preparing user '31220522'
Preparing user '2991030635'
Preparing user '731814427146764290'
Preparing user '23005399'
Preparing user '1049086147'
Chunk complete. Waiting 15 minutes.
Preparing chunk. Num users: 300
Preparing user '956209272102313989'
Preparing user '1161479181504516097'
Preparing user '19488406'
Preparing user '84690524'
Preparing user '730964226001408001'
Preparing user '741108596742586369'
Preparing user '179693229'
Preparing user '1240293787290779648'
Preparing user '888416128493510656'
Preparing user '242976501'
Preparing user '816523514190630912'
Preparing user '1183616394237227009'
Preparing user '630420208'
Preparing user '13144132135

Preparing user '1258031785558114305'
Preparing user '1239730265045356544'
Preparing user '1267990404487688194'
Preparing user '4070272397'
Preparing user '3222981440'
Preparing user '769817286'
Preparing user '1110979319247978496'
Preparing user '229916769'
Preparing user '1166517798149021696'
Preparing user '1046209208377651201'
Preparing user '164407049'
Preparing user '306908440'
Preparing user '4694481390'
Preparing user '2362722846'
Preparing user '2366419128'
Preparing user '1086100222512713728'
Preparing user '902577288948195328'
Preparing user '1244778258610229256'
Preparing user '68565801'
Preparing user '1075840425284526081'
Preparing user '37197330'
Preparing user '372489917'
Preparing user '26298021'
Preparing user '15600924'
Preparing user '1190903521467609088'
Preparing user '12844542'
Preparing user '1576421438'
Preparing user '2725417308'
Preparing user '1205284856806031361'
Preparing user '754719311558680576'
Preparing user '283964322'
Preparing user '12973173122869739

Preparing user '1314139638676828162'
Preparing user '63296156'
Preparing user '1261056740453482496'
Preparing user '2877179937'
Preparing user '1267568150700265487'
Preparing user '1214307942033326080'
Preparing user '3188621379'
Preparing user '1213283962296000512'
Preparing user '702640519273451520'
Preparing user '1160273500109758471'
Preparing user '509506361'
Preparing user '1285617809918222339'
Preparing user '1306832864659472384'
Preparing user '942597487'
Preparing user '2979994667'
Preparing user '2255610622'
Preparing user '1101466473762033665'
Preparing user '1294097042218852357'
Preparing user '233020172'
Preparing user '752163186'
Preparing user '604058092'
Preparing user '2932596425'
Preparing user '1263829197702729728'
Preparing user '1301265822493671424'
Preparing user '1043935693947502598'
Preparing user '3111736206'
Preparing user '757098344254627840'
Preparing user '959557602786119682'
Preparing user '1308956347376054272'
Preparing user '138784681'
Preparing user '13

Preparing user '1256337486860759040'
Preparing user '1329089688'
Preparing user '1108439289640243201'
Preparing user '1205549741876101128'
Preparing user '1260004523772456961'
Preparing user '43380834'
Preparing user '948396406242357249'
Preparing user '748022006'
Preparing user '258561688'
Preparing user '82946823'
Preparing user '955255904517029888'
Preparing user '281715071'
Preparing user '1238199487430082560'
Preparing user '140078045'
Preparing user '869955834935222272'
Preparing user '1265128764259868674'
Preparing user '2453430987'
Preparing user '1281289917910392837'
Preparing user '44632333'
Preparing user '3057756036'
Preparing user '1108829059071512579'
Preparing user '2878864727'
Preparing user '820644391069118464'
Preparing user '50651755'
Preparing user '2815792260'
Preparing user '1302423334118858752'
Preparing user '1016343301'
Preparing user '1183934538214342657'
Preparing user '47213480'
Preparing user '64464384'
Preparing user '15794667'
Preparing user '977191958'
P

Preparing user '3503520329'
Preparing user '1037898520093765634'
Preparing user '1313709495298654208'
Preparing user '1283545155962798081'
Preparing user '16075261'
Preparing user '1220349937302622210'
Preparing user '876109069312032768'
Preparing user '3145661947'
Preparing user '840066289'
Preparing user '1172317169557749760'
Preparing user '3341156470'
Preparing user '1062419581027917825'
Preparing user '1090788480'
Preparing user '1291576230937006080'
Preparing user '3067763790'
Preparing user '1219310372445990913'
Preparing user '30067748'
Preparing user '1033906579987357697'
Preparing user '30561416'
Preparing user '23668686'
Preparing user '45204479'
Preparing user '1322171952627359751'
Preparing user '1873554506'
Preparing user '222236681'
Preparing user '1213541391634034688'
Preparing user '17784283'
Preparing user '594322540'
Preparing user '1256404142991392768'
Preparing user '124618195'
Preparing user '1091156642123649024'
Preparing user '330308097'
Preparing user '18703040

Preparing user '32964416'
Preparing user '343123004'
Preparing user '535559749'
Preparing user '704826322351030272'
Preparing user '20830019'
Preparing user '34752746'
Preparing user '3708502641'
Preparing user '2961185536'
Preparing user '1034652038653784065'
Preparing user '1143133000659431425'
Preparing user '1304968900417925123'
Preparing user '857132329'
Preparing user '1063766958'
Preparing user '1012356378'
Preparing user '41634520'
Preparing user '260865201'
Preparing user '1152620064463568896'
Preparing user '851927779952218118'
Preparing user '1071198220422930432'
Preparing user '1223407321906180098'
Preparing user '791844638780854272'
Preparing user '1276839799542996993'
Preparing user '821087517961228290'
Preparing user '1311791893957816320'
Preparing user '81891226'
Preparing user '1177269044208119808'
Preparing user '29359131'
Preparing user '1112371010840510464'
Preparing user '24791743'
Preparing user '859226911490408453'
Preparing user '1011851685558476800'
Preparing u

Preparing user '1014218457506811904'
Preparing user '4047080903'
Preparing user '1463381107'
Preparing user '33112847'
Preparing user '4064163214'
Preparing user '2350393730'
Preparing user '378530969'
Preparing user '285428167'
Preparing user '1300910335'
Preparing user '1306689032722751503'
Preparing user '1281694075918442497'
Preparing user '881012873862033409'
Preparing user '788524'
Preparing user '1076986333'
Preparing user '2318222995'
Preparing user '1292605075'
Preparing user '27164756'
Preparing user '1322204248017301505'
Preparing user '3401129937'
Preparing user '1230289514876325891'
Preparing user '916527992238731264'
Preparing user '726003942597890048'
Preparing user '948740944614100992'
Preparing user '1314374430513004544'
Preparing user '1019325512425070592'
Preparing user '1112725572046610432'
Preparing user '1158120718288207873'
Preparing user '914915437728686080'
Preparing user '2554837140'
Preparing user '17228464'
Preparing user '1911989682'
Preparing user '9594283

Preparing user '284052760'
Preparing user '866607267990220800'
Preparing user '1642402514'
Preparing user '237986911'
Preparing user '4485994042'
Preparing user '3517880953'
Chunk complete. Waiting 15 minutes.
Preparing chunk. Num users: 300
Preparing user '1195348415431028738'
Preparing user '4909107696'
Preparing user '234974960'
Preparing user '347661208'
Preparing user '1296666174936113154'
Preparing user '3044828098'
Preparing user '330613684'
Preparing user '1028031355639218176'
Preparing user '1615454654'
Preparing user '705994443153920000'
Preparing user '1702348014'
Preparing user '483517190'
Preparing user '14171597'
Preparing user '24728591'
Preparing user '1773774002'
Preparing user '1042569900831789056'
Preparing user '1026991410573062144'
Preparing user '1248291297758232577'
Preparing user '18452400'
Preparing user '825517357695131649'
Preparing user '3053341664'
Preparing user '115149344'
Preparing user '1319661035368910849'
Preparing user '620690358'
Preparing user '399

Preparing user '4159909514'
Preparing user '1313188005385912320'
Preparing user '788155728443957249'
Preparing user '42366930'
Preparing user '2723504769'
Preparing user '65390037'
Preparing user '52638071'
Preparing user '1022846346506764288'
Preparing user '1265754836349472769'
Preparing user '1267105586040143874'
Preparing user '818271396010655745'
Preparing user '1277296390683725830'
Preparing user '229501164'
Preparing user '1235345590290182144'
Preparing user '540489050'
Preparing user '762328387'
Preparing user '195919468'
Preparing user '1018529353104175105'
Preparing user '1214026995060723712'
Preparing user '1319574196847104000'
Preparing user '3529462040'
Preparing user '87079705'
Preparing user '33451457'
Preparing user '118570392'
Preparing user '1030752618723901440'
Preparing user '1163168997984079872'
Preparing user '1304944064572329984'
Preparing user '243046596'
Preparing user '2238402254'
Preparing user '950365820'
Preparing user '1254425441378729985'
Preparing user '

Preparing user '2327404376'
Preparing user '2895313437'
Preparing user '2201552245'
Preparing user '101608330'
Preparing user '4153050136'
Preparing user '19241322'
Preparing user '2339891'
Preparing user '1199464397309009920'
Preparing user '44234992'
Preparing user '197957289'
Preparing user '3050402977'
Preparing user '758151466985066496'
Preparing user '937709427360014336'
Preparing user '986356435691991040'
Preparing user '3284181626'
Preparing user '1053409061734178816'
Preparing user '902448516'
Preparing user '825377278645379072'
Preparing user '16537818'
Preparing user '2732954837'
Preparing user '12000302'
Preparing user '1211417347702964224'
Preparing user '34093053'
Preparing user '1048287884678242304'
Preparing user '1308451745492799494'
Preparing user '1013929214'
Preparing user '2966457893'
Preparing user '198357693'
Preparing user '2808306433'
Preparing user '1266066300645847046'
Preparing user '1191643710'
Preparing user '1232732942524440578'
Preparing user '3548571075

Preparing user '1273172469541675008'
Preparing user '1468348021'
Preparing user '1253519716687081476'
Preparing user '2162628020'
Preparing user '974846751235428353'
Preparing user '979940192919400449'
Preparing user '138551940'
Preparing user '35889525'
Preparing user '787801631580618752'
Preparing user '18780322'
Preparing user '270020383'
Preparing user '21508174'
Preparing user '705077094674522112'
Preparing user '817443159928164353'
Preparing user '1614150008'
Preparing user '905954569'
Preparing user '918161578926792705'
Preparing user '1309598703196549120'
Preparing user '32281804'
Preparing user '703368447690940420'
Preparing user '42805100'
Preparing user '351876611'
Preparing user '1725407809'
Preparing user '1313134239882371079'
Preparing user '1236884148'
Preparing user '20541400'
Preparing user '1050230137478938624'
Preparing user '19986444'
Preparing user '69167813'
Preparing user '33594728'
Preparing user '1205354752499834880'
Preparing user '1073000680997171200'
Prepari

In [None]:
bot_probas

In [None]:
data['bot_proba'] = data['user_id'].map(bot_probas)
data['verifieds'] = data['user_id'].map(is_verified)
data.to_pickle("pickle/tw_proba_verif.pick")

In [None]:
verifieds

In [None]:
data['verified'] = data['user_id'].map(verifieds)
data['bot_proba'] = data['user_id'].map(bot_probas)
