# Imports

In [120]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import punkt
import nltk
from nltk import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import WordNetLemmatizer 

nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')
pd.set_option('display.max_columns', 1000)

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/MichaelWirtz/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/MichaelWirtz/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/MichaelWirtz/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [121]:
data = pd.read_csv('tweets_2.csv')

# Checking out Data

In [122]:
data.head()

Unnamed: 0,id,keyword,location,text,target
0,0,ablaze,,"Communal violence in Bhainsa, Telangana. ""Ston...",1
1,1,ablaze,,Telangana: Section 144 has been imposed in Bha...,1
2,2,ablaze,New York City,Arsonist sets cars ablaze at dealership https:...,1
3,3,ablaze,"Morgantown, WV",Arsonist sets cars ablaze at dealership https:...,1
4,4,ablaze,,"""Lord Jesus, your love brings freedom and pard...",0


In [123]:
data.isnull().sum()

id             0
keyword        0
location    3418
text           0
target         0
dtype: int64

In [124]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11370 entries, 0 to 11369
Data columns (total 5 columns):
id          11370 non-null int64
keyword     11370 non-null object
location    7952 non-null object
text        11370 non-null object
target      11370 non-null int64
dtypes: int64(2), object(3)
memory usage: 444.3+ KB


In [125]:
# Checking for class imbalance
data[data.target == 1].sum()

id                                                  12506470
keyword    ablazeablazeablazeablazeablazeablazeablazeabla...
text       Communal violence in Bhainsa, Telangana. "Ston...
target                                                  2114
dtype: object

In [126]:
# dropping not used columns
data.drop(columns=['id','keyword','location'], inplace=True)

# Tokenizing Words

In [127]:
tokenizer = RegexpTokenizer(r'[a-zA-Z0-9!]+')

data.text = data.text.apply(lambda x: tokenizer.tokenize(x))

In [128]:
data.head()

Unnamed: 0,text,target
0,"[Communal, violence, in, Bhainsa, Telangana, S...",1
1,"[Telangana, Section, 144, has, been, imposed, ...",1
2,"[Arsonist, sets, cars, ablaze, at, dealership,...",1
3,"[Arsonist, sets, cars, ablaze, at, dealership,...",1
4,"[Lord, Jesus, your, love, brings, freedom, and...",0


# Lemmitization

In [129]:
lemmatizer = WordNetLemmatizer() 

def lemm_text(words):
    lem = []
    for word in words:
        lem.append(lemmatizer.lemmatize(word))
    return lem  

In [130]:
data.text = data.text.apply(lambda x: lemm_text(x))

In [131]:
# Reformat to singular strings per observation
data.text = data.text.apply(lambda x: ','.join(x))
data.text = data.text.apply(lambda x: x.replace(',',' '))

# Vectorizing

In [133]:
token = RegexpTokenizer(r'[a-zA-Z!]+')
cv = CountVectorizer(lowercase=True,stop_words='english',ngram_range = (1,2),tokenizer = token.tokenize)
text_counts= cv.fit_transform(data.text)


In [138]:
df = pd.DataFrame(text_counts.todense(),columns = cv.get_feature_names())
df.head()

Unnamed: 0,!,! !,! back!,! boys,! cutting,! democrats,! discovered,! drink,! emergency!,! explosion,! eyewitness,! f,! fabulous,! february,! felt,! fin,! floo,! fully,! g,! getting,! good,! gooood,! gt,! guess,! ha,! hayate!,! healthy,! http,! huge,! hypocrisy,! incred,! lets,! like,! lil,! love,! luz,! m,! mone,! naming,! need,! nellie,! non,! playing,! ps,! replace,! retweets,! rock,! s,! saying,! science,! seen,! severe,! smoke,! sold,! stop,! sure,! surprise,! teamea,! theuntamedboys,! think,! ugh,! understood,! vogue,! vr,! warm,! windstorm,! won,! work,! x,!!,!! !,!! alpha,!! bored,!! drowned,!! fan,!! footage,!! high,!! http,!! news,!! raising,!! sounds,!! tht,!! usa,!!!,!!! choking,!!! don,!!! happy,!!! poor,!!! resident,!!! youtube,!!!!,!!!! u,!!!!!!,!!!!!! januaryblues,!!!!!! war,!!!!!!!!,!!!!!!!! s,!!among,!!among minor,!go,!go bulls!,!woot!,!woot! totally,aa,aa d,aaaaaaaaacccccckkkkkkkk,aaaaaaaaacccccckkkkkkkk survived!!!!,aacqlz,aacqlz ogl,aadharcard,aadharcard ad,aah,aahcojuw,aahcojuw v,aaiqqqf,aaiqqqf yu,aajta,aajta zwgp,aalaathun,aalaathun balaa,aap,aap chronology,aaron,aaron literally,aaronjayjack,aaronjayjack severe,aaronjayjack tornado,aau,aaueutxhxw,aaufhpf,aaug,aaug ern,aayega,aayega delhi,ab,ab gtfy,ab http,ab kath,ab speak,aba,aba woman,abaf,abaf battle,abandon,abandon property,abandon stri,abandoned,abandoned ausopen,abandoned fortuna,abandoned mean,abandoned owner,abandoned parents,abandoned reality,abandoned refugee,abandoned street,abandoned stru,abandoned wrecka,abandoning,abandoning member,abated,abated late,abbey,abbey eye,abbot,abbots,abbots lane,abbotsford,abbotsford squirrely,abbott,abbott catholic,abbott fought,abbott r,abbott s,abbott served,abby,abby huntsman,abby looked,abby typhoon,abc,abc australianbushf,abc australianbushfires,abc australianbushfiresdisaster,abc battle,abc bee,abc desperate,abc fails,abc http,abc news,abc showing,abd,abdel,abdel el,abdicating,abdicating wa,abducted,abducted child,abducting,abducting large,abduction,abduction bannin,abductor,abductor hostage,abdullah,abdullah expressed,abeg,abeg add,abel,abel having,abertillery,abertillery http,abetting,abetting terrorist,abgpfxq,abidin,abidin http,abiding,abiding c,abiding citizen,abiding p,ability,ability abusive,ability know,ability push,ability restrain,ability scan,abj,abj bus,abject,abject worshiper,ablanari,ablanari mohinimondol,ablaze,ablaze close,ablaze council,ablaze dealership,ablaze house,ablaze http,ablaze independent,ablaze l,ablaze land,ablaze latest,ablaze man,ablaze ndu,ablaze ngemsibaa,ablaze night,ablaze okada,ablaze p,ablaze product,ablaze redjanuary,ablaze snl,ablaze t,ablaze thing,ablaze total,ablaze wishing,able,able advanced,able album,able awareness,able bloody,able connect,able electrocute,able g,able grow,able hazard,able help,able http,able image,able just,able lecture,able physically,able reach,able rescue,able shit,able think,able totally,able touch,abnormal,abnormal normality,abnormal radiation,abnormally,abnormally ambient,abo,abo http,aboard,aboard bus,aboard http,aboard plane,aboard r,abomination,abomination aga,abomination desolation,abor,aborbitadi,aboriginal,aboriginal planner,aboriginal suicide,abortion,abortion constitutional,abortion martyrdom,abortion murder,abortion true,abortion womb,abou,abou wa,abounds,abounds air,above!,above! guess,aboveignorance,aboveignorance http,abovton,abovton o,abraham,abraham jacob,abrar,abrar took,abroad,abroad vikas,abruptly,abruptly break,abs,abs cbn,absence,absence court,absence stretcher,absentee,absentee landowner,abso,abso frickin,absol,absol http,absolute,absolute dickhead,absolute disaster,absolute emotional,absolute fave,absolute mayhem,absolute meltdown,absolute movie,absolute nonsense,absolute reason,absolute sincerity,absolute weapon,absolutely,absolutely agree,absolutely annihilated,absolutely blew,absolutely bonker,absolutely car,absolutely comical,absolutely correct,absolutely crushed,absolutely damage,absolutely delighted,absolutely demolish,absolutely demolished,absolutely doe,absolutely effect,absolutely exasperating,absolutely f,absolutely flattened,absolutely fucking,absolutely haunted,absolutely heartbreaking,absolutely inundated,absolutely killed,absolutely killing,absolutely loved,absolutely loving,absolutely obliterate,absolutely priceless,absolutely reas,absolutely right,absolutely ripping,absolutely s,absolutely shitting,absolutely spewing,absolutely think,absolutely tragic,absolutely wonderful,absolutely world,absolutely wreck,absolutely wrecked,absolutely youtuber,absolutely!,absolutely! uss,absorb,absorb heat,absorb long,absorption,absorption center,abstain,abstain westminster,abstained,abstained drawing,abstract,abstract pattern,abstract!,abstract! fault,absurd,absurd hyperprecise,absurd unreasonable,abt,abt debut,abt guy,abt http,abt mudslide,abt spreaded,abt taal,abt troll,abt u,abtm,abu,abu ammar,abu amrah,abu http,abu saber,abuja,abuja furniture,abuja rea,abuja th,abuja won,abukuma,abukuma river,abuse,abuse allegation,abuse amp,abuse authority,abuse buried,abuse command,abuse dog,abuse emergency,abuse ha,abuse human,abuse info,abuse nervous,abuse patient,abuse problem,abuse ruban,abuse scandal,abuse usually,abuse ve,abused,abused harlow,abused people,abused power,abused ppl,abuser,abuser peter,abusing,abusing http,abusive,abusive digital,abusive http,abusive maybe,abvp,abvp cadre,abvp female,abvp questioned,abwo,abwo hxuz,abx,abx jisc,abysmal,abysmal handing,abzdkqocpv,ac,ac http,ac rvot,aca,aca wa,acacia,acacia ampliceps,academia,academia time,academy,academy agricultural,academy award,academy completely,academy dead,academy gave,academy hyderabad,acbpjurik,acc,acc follow,acc http,acc soondingies,acc yxp,acce,accelerate,accelerate http,accelerate t,accelerates,accelerates http,accent,accent mean,accents,accents worked,accept,accept bungee,...,zepp lzpna,zero,zero casualty,zero chance,zero dark,zero degree,zero iraqi,zero nearly,zero repercussion,zero weapon,zeromdr,zeromdr issue,zeroplasticrun,zeroplasticrun iamrespons,zeroranger,zeroranger don,zeroranger super,zetsumerise,zetsumerise key,zettajoules,zettajoules energy,zeus,zeus skin,zevon,zevon id,zeyeyp,zf,zfhawgl,zfj,zfj shjggo,zfrdb,zfrdb aoql,zfsa,zfy,zfy h,zfzcmltm,zfzcmltm java,zg,zg bv,zg zu,zgeggatd,zgeggatd l,zgh,zgj,zgk,zgk product,zgqmkqbhid,zgs,zgs jnm,zgt,zgt fz,zgucw,zguyr,zguyr zitd,zh,zh sqsjae,zha,zha maa,zhan,zhan managed,zhan trouble,zhmf,zhmf mscn,zhmoop,zhmoop fuf,zhnazpwspf,zhob,zhob road,zhptsiyinb,zhqaxd,zhqaxd l,zhudds,zhudds f,zhuhai,zhuhai south,zhuhai today,zhvvfox,zhwbjypf,zhwbjypf b,zhwspy,zhyxjfw,zhyxjfw jv,zhzxlo,zi,zi ux,ziacegt,ziacegt oa,zico,zico posted,zicpvc,zicpvc m,ziegler,ziegler hundreds,zifmahfpzu,zifmahfpzu http,zifynfv,ziisvy,ziisvy eab,zijin,zijin shan,zimbabwe,zimbabwe concern,zimbabwe park,zimbabwe run,zimbabwe s,zimbabweans,zimbabweans hostage,zimbabweans realised,zina,zina ha,zionism,zionism lo,zionist,zionist owned,zionist university,zionists,zionists feel,zip,zip bts,zip code,zip drop,zip dropped,zip photo,zip photo!,zip shut,zitd,zixb,zixb cm,zj,zj esbmg,zjameqhqv,zjcdcdo,zjcdcdo cd,zjesibazbq,zjfn,zjfn uue,zjg,zjg lka,zjiob,zjjwb,zjjwb imi,zjmvtw,zjpv,zjpv scquy,zk,zk nj,zka,zka lg,zkd,zkeixqva,zkiyilmkob,zkjdpyzim,zkngkgfneo,zkp,zkqcnuchzn,zkwrmvmg,zl,zl hwm,zl mduoe,zldcwcx,zldl,zldl jszyv,zlg,zli,zli tunjhk,zlifvmcfhb,zlldzjc,zlldzjc od,zlngncclo,zlocwxi,zlwmep,zlwmep o,zlxdahfewr,zlxdahfewr http,zlxrv,zm,zma,zma tvao,zmbsga,zmbsga xrx,zmezn,zmezn kcqc,zmmy,zmofif,zmofif e,zmpvktxdxg,zmrjzptus,zmuju,zmuju wit,zmvfab,zmvor,zmvor rhzl,zn,zn f,zn r,zn vpwoxfs,znb,znb j,znbklocz,zncf,zncf z,znmdskv,znmdskv q,znoy,znqzthlxx,znrbucpt,znrbucpt n,znv,znv q,znxc,zo,zo itu,zo nopjprh,zoafcphpcp,zoepddpgsm,zojbdth,zojbdth mw,zom,zombie,zombie apocalypse,zombie company,zombie en,zombie inundatio,zombie run,zombie survival,zombie time,zombies,zombies hopefully,zombievirus,zombievirus http,zonal,zonal cooling,zone,zone ambazonia,zone aren,zone beta,zone cont,zone cybil,zone eve,zone family,zone great,zone hd,zone heights!,zone http,zone like,zone man,zone men,zone mental,zone multiple,zone piccadilly,zone prod,zone refugee,zone s,zone safe,zone sailor,zone teems,zone thebachelorabc,zone threatened,zone tin,zone toxic,zone wa,zone war,zone!!!,zone!!! mean,zone!!!!,zone!!!! head,zoo,zoo gorgeous,zoo nsw,zoom,zoom took,zoom video,zoomed,zoomed version,zoq,zorro,zorro s,zos,zos deserves,zoth,zoth update,zoxsmqscwk,zoy,zoy lj,zoya,zoya s,zp,zp b,zp f,zphdl,zpkbzmavzq,zpl,zpmpuqqtx,zpnjecaqnc,zpo,zpo jagq,zppufp,zppufp wp,zpqgiv,zpqgiv p,zq,zq hey,zq hqr,zq izvbs,zqg,zqg snrd,zqitmgbwg,zqitmgbwg http,zqlz,zqlz ex,zqnatxh,zqnatxh wh,zqqlr,zqrdapd,zqwxyjsgv,zqygduk,zqyzupqyo,zr,zr n,zra,zra zp,zrhdq,zrjrynq,zrjrynq o,zrooahsh,zrtelex,zrtelex kr,zs,zs hgn,zs hxeisqn,zs qjtdg,zsd,zsd rgual,zseuahyto,zseuahyto http,zsgx,zsgx mc,zsh,zsiwq,zsiwq pjvk,zsnl,zspjoiweyi,zspmw,zsqa,zsqa e,zswliapenb,zswliapenb http,zswt,zswt s,zsz,zsz g,zszww,zszww ac,zt,zt xb,zt y,ztduku,ztfxb,ztjlf,ztknwocps,ztn,ztrhfejg,zttshwok,ztyb,zu,zu hsi,zu mf,zuckerberg,zuckerberg http,zuckerberg view,zudj,zudj ag,zuh,zuh s,zuin,zuio,zuio fs,zulaikha,zulaikha s,zularoqepo,zulaykha,zulaykha s,zulxmqwlbi,zuma,zuma did,zuqjgmg,zuqjgmg c,zurich,zurich http,zusn,zusn lgq,zuv,zuv gt,zuyirpzaow,zv,zv slj,zv x,zvaqhpeb,zvaqhpeb o,zvb,zvb quokn,zvbylohw,zvfpimhopu,zvh,zvh kuvwp,zvjtisyurk,zvkyjbjfo,zvlg,zvlg h,zvrwavc,zvs,zvs lmohp,zvyojqtilk,zvzzfrhs,zw,zw hostage,zw uvxn,zwcgsh,zwcgsh cpx,zwgp,zwgwg,zwgwg mc,zwh,zwhxv,zwhxv j,zwj,zwlo,zwlo e,zx,zx rwhc,zx ttj,zxegq,zxegq fen,zxgk,zxhqqpc,zxl,zxl e,zxl y,zxmvbzgchb,zxn,zxn avdywv,zxnrgutvl,zxrsml,zxrsml epi,zxsapdkaia,zxsfl,zxsfl n,zxsz,zxsz e,zxvy,zxvy denti,zxya,zy,zy agqe,zy http,zy ir,zy y,zygtelqf,zygvc,zyix,zyix oyj,zykgai,zyl,zyqo,zyqo dgpan,zyrvbs,zysodowknh,zysodowknh http,zytlhun,zytlhun mz,zyytfvyps,zyytfvyps pathetic,zz,zz cannabis,zz ciqo,zz jlo,zz k,zz r,zz xedcf,zzasvqaojf,zzggqn,zzggqn ar,zzglojct,zziomtdnpj,zzip,zziqsrqevh,zzkhkxcft,zzqzn,zzqzn idgt,zzrdk,zzttiu,zzttiu z,zztybwygj,zzvvsa,zzvvsa gf,zzwh,zzwh b,zzx,zzx mdi,zzzmp
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [147]:
# renaming columns to avoid join overlap
data.columns = ['text_original','target_classification']

In [149]:
data2 = data.join(df, how='outer')

In [152]:
data2.drop(columns='text_original', inplace=True)