In [94]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff
from urllib.parse import urlparse
import string
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import nltk
import re

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option("display.max_colwidth", None)

# Datasets joining and reading

In [95]:
starting_points = {'parsed\\parsed_tweets_gossipcop_fake.csv': 'gossipcop-699303448',
                    'parsed\\parsed_tweets_gossipcop_real.csv': 'gossipcop-881670',
                    'parsed\\parsed_tweets_politifact_fake.csv': 'politifact15135',
                    'parsed\\parsed_tweets_politifact_real.csv': 'politifact954'}

parsed_files_paths = list(starting_points.keys())

dfs_dict = {}

columns = ['news_id','tweet_id','text','label']

for parsed_file_path in parsed_files_paths:
    # for tweets df

    words = parsed_file_path.split('\\')[1].split('.')[0].split('_')
    words.pop(0)
    df_name_tweets = '_'.join(words)

    dfs_dict[df_name_tweets] = pd.read_csv(parsed_file_path,header=None, names=columns,encoding='utf-8')

    # for news_df

    words.pop(0)
    df_name_news = '_'.join(words)

    file_path_news = f'datasets/FakeNewsNet/{df_name_news}.csv'

    dfs_dict[df_name_news] = pd.read_csv(file_path_news,encoding='utf-8')

In [96]:
dfs_dict.keys()

dict_keys(['tweets_gossipcop_fake', 'gossipcop_fake', 'tweets_gossipcop_real', 'gossipcop_real', 'tweets_politifact_fake', 'politifact_fake', 'tweets_politifact_real', 'politifact_real'])

In [97]:
dfs_dict['politifact_real'].head()

Unnamed: 0,id,news_url,title,tweet_ids
0,politifact14984,http://www.nfib-sbet.org/,National Federation of Independent Business,967132259869487105\t967164368768196609\t967215618687512581\t967386148468416513\t968234223189610498\t968238354599940096\t969607331330908161\t969607746898153473\t969702389421826049\t969752298225356800\t969752332220190721\t969985181695213568\t971015530822893568\t971215479015555072\t971430739970592768\t971626584120717313\t971647044992385024\t971815808916770816\t971815809260707841\t971894375075762176\t972472163088371713\t972521429379010561\t972521700033286145\t972681763175784448\t972805595077251072\t972870143184338945\t972924479826485250\t973363426071216130\t973536614898110464\t973568735658364928\t973574600578695170\t973574645956870144\t973930299045306368\t974104473168695296\t974110437213171712\t974309878926905345\t974309878863888389\t974408326414065667\t974410428171280384\t976262632725516288\t976498012246740992\t976498012242563072\t976562022262431744\t978305801944154112\t978364325453643776\t978374010751803392\t978644105281581056\t978667092194013184\t978667458260357120\t979004774707167234\t979367042972356608\t979367399546937344\t979422491893534720\t979818322651787264\t980247411259002882\t981229682292133889\t981271590884331520\t981528097391595521\t981963408013602816\t981966925809909760\t981967796815839232\t981978858579607554\t982038748979490816\t982078841895227394\t982233981289574401\t982267063514185735\t982367364992983041\t982394410380902400\t982511837429354496\t982634884110995456\t983665418635112448\t984052456735805440\t984119550026412038\t984862602533597184\t985001084858064896\t985941222731722752\t986305073285292033\t986696357112000512\t986995378443583490\t987028780257554433\t987350383352860673\t987635022600421376\t987698268942499842\t988556238630850560\t989170236313227265\t991064333391187975\t991308112794710016\t991675253108674561\t991687816596934656\t991689007166840834\t991689624446791680\t991779934904369152\t993291436077568000\t993596536863969280\t993884409530077184\t993914004119711744\t994076514709893120\t994087205869105155\t994110127132303362\t994393622262018049\t994568381541093376\t994611795917791232\t994634826341896193\t994658283565801473\t994986254805630976\t995017768327176193\t995048600462811136\t995091181687062529\t995287485507829760\t996029144470310913\t996060270244999168\t996184590367756288\t996449475148288000\t996456813657604096\t996488822094315520\t996802269277577220\t996837165559664642\t996875145556905984\t997006499040854016\t997287215737651200\t997287219688751105\t997314102551756801\t997416534497783808\t997520943655411718\t997567527508160513\t997945017493786624\t998195222554267649\t998273391407878149\t998579444703952896\t998938114231070720\t998970805282377729\t1000035773364584449\t1001513675289743360\t1002253662037729281\t1002284737107562496\t1002591360451522562\t1002632207570210816\t1002993825336430594\t1003309065236353026\t1003693424514600960\t1003709005926223873\t1003830134749753344\t1004000891630997504\t1004001180022796288\t1004086728586596352\t1004153605031538689\t1004760149234475010\t1004870094432952321\t1005035723655991296\t1005160020848373760\t1005162993133465602\t1005168844871229440\t1005696782997827584\t1005831723156897792\t1006226725913624587\t1006260177040039936\t1006477702113587200\t1006477982351970305\t1006481455147462656\t1006500717404872704\t1006501362836918272\t1006512159868772352\t1006574043527548931\t1006657080869212160\t1006703207467442178\t1006927013930831872\t1006970454450589699\t1007024299880960003\t1007401806534729728\t1007641625462534146\t1007731754474631169\t1007731773898477568\t1007737537698418690\t1008044220488171520\t1008045821416206336\t1008265526148075521\t1008513569451859969\t1009044337106145282\t1009100089644797952\t1009102563952775168\t1009103238111682560\t1009109125085589504\t1009109131377152000\t1009109186758828033\t1009110709140180992\t1009111129757401088\t1009111311593041921\t1009111323941142528\t1009111885579337728\t1009113330215202818\t1009113404789985280\t1009113499216302080\t1009113498603880448\t1009113602903531521\t1009113630682550272\t1009113749678985220\t1009113778485637125\t1009113776690360320\t1009113788447166468\t1009113844399198209\t1009113846643154944\t1009113858278150144\t1009113858127159296\t1009113864246562818\t1009113878905618432\t1009113940717076481\t1009114012934696961\t1009114047105716225\t1009114098129362944\t1009114111559585792\t1009114169390583808\t1009114404716187653\t1009114409770373121\t1009114417521455110\t1009114486698135553\t1009114696417505282\t1009114758702772225\t1009115083685990400\t1009115353031561216\t1009115441250357249\t1009115638814724097\t1009115986698670080\t1009116127379636224\t1009116283902746625\t1009116503654981632\t1009116836494004224\t1009117007298646016\t1009117042572693504\t1009117247145639937\t1009117587865817089\t1009117987129847808\t1009118090817392640\t1009118168198057984\t1009118223403450368\t1009118447748280320\t1009118602803470336\t1009118652937986050\t1009118793979891712\t1009118815530246145\t1009118818680139781\t1009118862405722115\t1009118963886841856\t1009118975622463488\t1009119323799969792\t1009119464229519360\t1009119775816077315\t1009119918497976320\t1009119920070840321\t1009120155916521473\t1009120169132605440\t1009120211465834497\t1009120230138957825\t1009120305741103104\t1009120370975051776\t1009120420572774401\t1009120453280063488\t1009120547219869696\t1009120627851255809\t1009120690753110017\t1009120756503019522\t1009120812459347968\t1009120816867364864\t1009120984920739840\t1009121022321176576\t1009121028843474945\t1009121046103035907\t1009121277636947969\t1009121589470756864\t1009121892463095811\t1009121919457734656\t1009121941066801152\t1009122159153709056\t1009122298652188673\t1009122328901451777\t1009122410136649728\t1009122854502371329\t1009123522348630016\t1009124007470174210\t1009124077251059714\t1009124596421885957\t1009124725099040768\t1009124776785469448\t1009124845488046080\t1009125212904984577\t1009125242004910080\t1009125316449595393\t1009125475103444992\t1009125821490040832\t1009126276471324672\t1009126333559947264\t1009126354338615297\t1009126451491287045\t1009126769515888640\t1009126825145061376\t1009126888122568704\t1009127005789421568\t1009127769341595650\t1009127926476910592\t1009128011239772163\t1009128488962609152\t1009128536844722176\t1009128665714749440\t1009129003515613184\t1009129095215665155\t1009129204686913539\t1009129418336415746\t1009129480655392768\t1009130378819297282\t1009131124121980928\t1009132160417173505\t1009132207129202689\t1009132510670938112\t1009133811718938625\t1009133979260391424\t1009134346442141696\t1009134348681994241\t1009134714656120832\t1009134916267696128\t1009135757427081217\t1009136099539574784\t1009136121060528128\t1009136326258524160\t1009136599697973250\t1009136986173554689\t1009137467885309952\t1009137699700297729\t1009137972543967232\t1009138175141384192\t1009138843524595712\t1009139553330855937\t1009139807253958656\t1009140029531099136\t1009140608617930752\t1009141758268190720\t1009142133079576576\t1009142498588053506\t1009143143864963073\t1009143615950610436\t1009144098782175232\t1009144539867729922\t1009145639542902787\t1009146112165515266\t1009146127583719424\t1009146521051369473\t1009146957842051072\t1009148128317067264\t1009148246651031553\t1009148729759227904\t1009149033338888193\t1009150116295532544\t1009151057899999233\t1009151793102610432\t1009151882982580224\t1009153786907185153\t1009154069158604803\t1009154118102016000\t1009154483681742851\t1009155887779500039\t1009156073041866754\t1009156127395700736\t1009156148581158916\t1009156881703583744\t1009157019712946176\t1009157091091779585\t1009157566608355328\t1009157647843692544\t1009158300577992705\t1009158463132504065\t1009158557642547200\t1009160148827365376\t1009160154774818816\t1009160743332216832\t1009160926740639747\t1009162199074508801\t1009162374425841664\t1009164544550174720\t1009164935216197632\t1009165270810873862\t1009165782033555456\t1009168464777318400\t1009169227775987712\t1009169515056377857\t1009170173054586885\t1009171518838923264\t1009172468685246469\t1009172483071709184\t1009173096501317633\t1009173584391131136\t1009174036566462466\t1009174794942779393\t1009175044528865280\t1009175776980324352\t1009176464745476096\t1009179012118147072\t1009180793007169538\t1009180835801714688\t1009180911185924099\t1009181117205839878\t1009181441375129600\t1009181902044127232\t1009182059594592256\t1009182758688739336\t1009184903961903104\t1009184918906331143\t1009185534965637120\t1009188211522985990\t1009189185331212288\t1009190029426642946\t1009190657343270912\t1009190729636372482\t1009191545793228800\t1009191780934389760\t1009191967786455041\t1009193284823068672\t1009194112233410561\t1009194219137851393\t1009194926762967042\t1009194956773191680\t1009195594433343490\t1009196255329714176\t1009198494874140672\t1009199507081498627\t1009199745817038849\t1009200607805108224\t1009201598915072001\t1009202620706295808\t1009203550495395840\t1009205809899712512\t1009206484885016578\t1009206484088082432\t1009206495970496512\t1009206522067472384\t1009206585405722625\t1009206868533743616\t1009206889048047616\t1009206974565711874\t1009207031612534784\t1009207031604174848\t1009207252069371904\t1009207273909047296\t1009207292737282048\t1009207304909139968\t1009207353068195840\t1009207485654294528\t1009207560090587136\t1009209171953188866\t1009209472294752256\t1009209498806784000\t1009214068077580291\t1009214619213291521\t1009215135049641984\t1009217000667836416\t1009218542640685057\t1009220248115875842\t1009223205171671040\t1009224566198202370\t1009227237034545154\t1009231938853527552\t1009232086350356481\t1009234323143262210\t1009235016105119744\t1009236247397707776\t1009241029768249344\t1009241500885159936\t1009241627305508864\t1009241877097242624\t1009252339599990784\t1009252801887776769\t1009255505871507456\t1009264380490539008\t1009267510103851008\t1009280059763388416\t1009281598112583680\t1009286216116768769\t1009288642567663616\t1009290002684137477\t1009295278715727873\t1009295399306194944\t1009295494487523329\t1009295579363459072\t1009297399351611392\t1009298286862327808\t1009300060457271296\t1009300058293112832\t1009303492085866497\t1009306256987901952\t1009307254762418176\t1009308645891469312\t1009319974295531520\t1009321011479605248\t1009323911614361601\t1009326044409712642\t1009329895737085953\t1009330239804264448\t1009331165105410048\t1009334083686748160\t1009339277069742080\t1009340433506455554\t1009342035344678912\t1009343165617377280\t1009347230548025344\t1009347432302501888\t1009347807461830656\t1009350893014155266\t1009352430289596417\t1009352848117780481\t1009364323444109314\t1009369224412884993\t1009371522618810369\t1009382450202492929\t1009384444883623936\t1009384963332923399\t1009385370226774016\t1009400520568705024\t1009407067860422657\t1009409640092831744\t1009412002874355713\t1009419349961166848\t1009421915965018112\t1009424003759370241\t1009425940617875460\t1009426611274428416\t1009428560766013440\t1009433140194971648\t1009435713950175233\t1009437976542081024\t1009443707052003328\t1009451739534233601\t1009460567143313408\t1009464951709360130\t1009484742637555713\t1009517943108128770\t1009523079511445504\t1009540615120539659\t1009541731942100992\t1009551988055527424\t1009552867605139456\t1009583176606511105\t1009583521260883968\t1009592787648512001\t1009604629905248256\t1009626886975229952\t1009667426802270209\t1009670907625918464\t1009698936184016896\t1009787234566975489\t1009815032866246656\t1009890052372123648\t1009937622884466689\t1009956260995878912\t1009986336089141248\t1010146901176418304\t1010150457677877253\t1010173784769400832\t1010182499547406337\t1010182509093621761\t1010188741812498433\t1010583298731433985\t1010624880319004672\t1010706578213728256\t1010710255297183744\t1010743928822423552\t1010747319728463872\t1010749667162902528\t1010771498880020480\t1010776147150372865\t1010822420767363073\t1011032933749096448\t1011286815578443777\t1011326425654317058\t1011351283977027586\t1011449657250209793\t1011549112364253184\t1011642697323634690\t1011685697009410048\t1012019895406645249\t1012028841030586369\t1012044747345682432\t1012138471706759169\t1012142197150322688\t1012368962213109761\t1012374054022590467\t1012499738191679488\t1013171347411914753\t1013829907913236480\t1014187393468043264\t1014228753617125376\t1014633816290447360\t1015234056269725696\t1015296876696858625\t1015652216328187904\t1015686927058546688\t1015923874507214848\t1016098991577096192\t1016250629964980224\t1016344987431243776\t1016359933523255296\t1016443892693073926\t1016642833564958720\t1016685967963512834\t1016744969334394880\t1016776110200238080\t1016781715904323584\t1017025453977210885\t1017070259361144832\t1017158304890609664\t1017396155427278848\t1017424098446725120\t1017448809725612033\t1017503174012342273\t1017821830365700096\t1018130962151084032\t1018228616214339584\t1018289829451313152\t1018527286268780544\t1018659153064157184\t1018859816771575808\t1018873937520930817\t1018876481529896961\t1018887052518199303\t1018926243599802368\t1018946701548249088\t1018989397331636224\t1019050316115791872\t1019430374999449600\t1019592859383685120\t1019596231138476032\t1019710236247183360\t1019854580967006208\t1019936492506951680\t1019950132169707520\t1020011840326152193\t1020012050049851392\t1020012606780628992\t1020016158836109313\t1020046135820353536\t1020073686659870720\t1020096539438583815\t1020102760249753603\t1020104427401719810\t1020104967296815109\t1020110904933822464\t1020110997305032712\t1020111522716979200\t1020112422022123521\t1020114848611487751\t1020119032694038528\t1020119569971720194\t1020127161154404354\t1020128165660831744\t1020129741066530816\t1020135275920412672\t1020139655730290688\t1020143441685241858\t1020143563961700353\t1020144440793608192\t1020145972939173888\t1020147881209102337\t1020152035273035777\t1020152557140955136\t1020157911870332928\t1020158474938998784\t1020160966984470528\t1020165836370206720\t1020173580129251334\t1020178615701344256\t1020196821497360385\t1020217683323826176\t1020218309646733313\t1020218667215269888\t1020219958872158215\t1020242665642786816\t1020247733091921920\t1020253141982482434\t1020260251189342208\t1020264132979961857\t1020275534419447810\t1020278517542588416\t1020279573324476416\t1020290751861846021\t1020292778209705984\t1020298127327354881\t1020298455800074243\t1020302240832159745\t1020303747921170433\t1020304438181998592\t1020304979062648832\t1020307404947443714\t1020307618311671808\t1020316177241985024\t1020318183360802817\t1020319263943520256\t1020320910480420866\t1020327490756251648\t1020336539845496832\t1020346837394477057\t1020347137639501825\t1020348941777293313\t1020351178910195712\t1020370763914915846\t1020377259377594368\t1020392977464733697\t1020393614852198400\t1020460403522973697\t1020462469767221248\t1020467025125216261\t1020478375872417792\t1020478401361035264\t1020574329363410949\t1020624667290292224\t1020639345970171904\t1020641602149249024\t1020655640413442048\t1020659879349014528\t1020701638393319424\t1020705467574759424\t1020722927057219584\t1020727563289858048\t1020787504008527872\t1020793159545020416\t1020796374705627136\t1020812110102781959\t1020840080439685120\t1020889113283579904\t1020916040195244033\t1020921152104017920\t1021000246887944193\t1021000293054668800\t1021021538777214981\t1021024603660005376\t1021025873582084097\t1021027106342547457\t1021031561494433792\t1021035676324724741\t1021049454944608256\t1021080258965934080\t1021092550264999936\t1021127295673733120\t1021143081909809152\t1021147916868702208\t1021168196899016705\t1021188055061942274\t1021223362893418497\t1021479992738373634\t1021592597012180992\t1021747842736476160\t1021817162359562240\t1021908432373182464\t1021914904561967110\t1022201012399038464\t1022472631122055168\t1022485364223746048\t1022501560935481353\t1022655958600871937\t1022817983809220615\t1022998648739659776\t1023634838044200960\t1023962359709093889\t1024003633699061765\t1024376913920491521\t1024693207291101184\t1024721449712594949\t1024729286908014593\t1024999905650241538\t1025061131520548864\t1025214614248808449\t1025472279831031808\t1025480179693637634\t1026193490903027713\t1026255521685352448\t1026423634880024577\t1026448593639669761\t1026524885538013184\t1026542926430494722\t1026549716589281280\t1026808345867505664\t1027224848718692352\t1027605431110262784\t1028365947889807360\t1029072127415263234\t1029232164708278274\t1029245619037065216\t1029319196818255873\t1029336080414171136\t1029367409705209856\t1029392446562742273\t1029401734500757506\t1029414267857104901\t1029414550758715394\t1029445314233561088\t1029478471301562369\t1029479037188681734\t1029479612236144642\t1029633282944712705\t1029660994258259969\t1029685893123239936\t1029686270828732416\t1029715381857001472\t1029735182578987008\t1029735381888118784\t1029775337964675072\t1029777694647496704\t1029779174150610945\t1029799060432019456\t1029834401654419456\t1029908720321523712\t1029912864491032576\t1029913384895045632\t1029949134084759552\t1030941681246334976\t1031556723411173376\t1031580875635544067\t1031646093875048448\t1031903753471807488\t1032301108235051008\t1032425573057024000\t1032733051967758336\t1032751535279800320\t1034032779133112320\t1034822187339919360\t1035151945571487744\t1035269786580803584\t1035598334608592896\t1035655648392998912\t1035916505836183552\t1036049339141971970\t1036427080270073857\t1037008318970187776\t1037011490031710208\t1037102366800179205\t1037419211268583424\t1037430400090439680\t1037468928409305094\t1037689484731666433\t1037720791809286144\t1037749740119764992\t1038440491442823170\t1038493943476961281\t1038588865559318529\t1038950009872572416\t1038967068853448710\t1038969022564450304\t1039278617992355840\t1039279499731296256\t1039442609909981184\t1039443628484161536\t1039444321534189568\t1039458089056002048\t1039491758139297792\t1039491979758002177\t1039494683179737090\t1039519706699296770\t1039525704273158144\t1039527822610522113\t1039535960239878144\t1039555777747460101\t1039558760761581569\t1039558824934301701\t1039559750088880129\t1039565054918762498\t1039566979655393280\t1039569464877035520\t1039590232402944002\t1039590320164483077\t1039607906218520579\t1039609809123848192\t1039646179347165186\t1039649948634112001\t1039655434926391296\t1039657220894470146\t1039662833351512064\t1039664933590446081\t1039691205775376387\t1039850925597179904\t1039873919828873216\t1039892258898825216\t1039893245839339520\t1039908116442230784\t1039921798756937728\t1039942823284535296\t1040001007739195394\t1040016371172683782\t1040036649982353408\t1040077954657673216\t1040207960167702528\t1040223626098892800\t1040239831014678529\t1040241950195101701\t1040265198345576450\t1040268102037106689\t1040286782598864896\t1040374791923003392\t1040537862800859136\t1040544451763036165\t1040609524414001152\t1040721936572137472\t1040919726761947138\t1041310849317646337\t1041334536968142849\t1041492152788938754\t1041734230768844800\t1041747111946190848\t1041783957967265793\t1042122469400666112\t1042132005364883456\t1042140619865567235\t1042580556276527104\t1042586319682252800\t1042745470521667584\t1042793370865872896\t1042799312302997504\t1042808655723343872\t1042808992450392064\t1042811716940312577\t1042835980015808512\t1042951715182596096\t1042965519140311041\t1042967210061455365\t1043122464992178178\t1043133683794956288\t1043134368292712448\t1043134655598342149\t1043138082680307712\t1043177536056909824\t1043222866186129408\t1043292714136162304\t1043591599098421249\t1043998743681544193\t1044295265166020609\t1044302263228006400\t1044302827739377664\t1044633076142428165\t1044656419994312709\t1044684762189897732\t1044937439628939265\t1044977097486356480\t1045016980498141189\t1045017267099107328\t1045019480546562048\t1045035032245661696\t1045059817637924864\t1045060120525312001\t1045099628239171586\t1045201414673379328\t1045332312207097857\t1045338592850186240\t1045391742336466945\t1045482680274563073\t1045714630256078849\t1045729054576529408\t1045743319723065344\t1045776844027559936\t1045778400990941184\t1045779797069221889\t1045939742641139712\t1046414369226215424\t1046779884067082240\t1046785074069426176\t1046794392512675841\t1046842229015425024\t1046881703023636481\t1047129206281388034\t1047148862849331206\t1047177423291396096\t1047252170729508864\t1047461541061300224\t1047496978945515520\t1047622512534671361\t1047633660307013632\t1047885814649245696\t1047895961027203080\t1047947272078721024\t1048299309442072578\t1048373540230189056\t1048573156380856321\t1048613431492919302\t1049019027161534464\t1049085508557721600\t1049308450323226625\t1049361691157430273\t1049467374154473473\t1049600652933033984\t1049609369678110720\t1049616563215757312\t1049618061630812160\t1049695686692880387\t1049710831984959489\t1049758994187898885\t1050045584034299904\t1050067363725361153\t1050288687282577408\t1050506684337201157\t1050510122760511494\t1051100316521955329\t1051505186219290626\t1051614334642728960\t1051706075345510400\t1051805239752478720\t1051848013243539456\t1052344360661118977\t1052372506613620736\t1052378836564893697\t1052553134122381312\t1052562069957107713\t1052904859383533568\t1052937295437946881\t1052978134511996929\t1052982428380393472\t1053002248081403904\t1053039294384721920\t1053039449129410560\t1053039448106008576\t1053039447179030528\t1053039450060599296\t1053045606896361479\t1053092122616836096\t1053227308247629824\t1053283823365095424\t1053450356590632961\t1053714717385068544\t1053715357658157059\t1053717486145806336\t1053717861028499457\t1053758757417684995\t1053767275277377542\t1053906707158568960\t1053925638258806785\t1054208372407496706\t1054236643782090752\t1054477808511213568\t1054517417102069760\t1054618567067475968\t1054752585947734016\t1054824395674451973\t1054832304441430017\t1054840449335205889\t1054843656337850369\t1054911542238633984\t1055079411693821952\t1055083007021846528\t1055287527232299008\t1055463720154001408\t1055967583927554049\t1056183913222156289\t1056238801469743104\t1056974312740139008\t1057036105927790592\t1057039816095870977\t1057315381226942464\t1057402295913201669\t1057622862419386374\t1057757890444976130\t1057959247227559936\t1058029904313401344\t1058050216350400514\t1058086329207214080\t1058292494507696128\t1058432169104863232\t1058532882241806336\t1058753199082012672\t1058773044209049601\t1059072696783298560\t1059156964608016384\t1059509997070176257\t1059604688134397952\t1059829761437036545\t1059966140858974208\t1060258574469869570\t1060707555415154689\t1061488357560197121\t1062254543826763776\t1062462236424265728\t1062553693994782720\t1062617263528919040\t1063130361448800256\t1063464396591153153\t1064393916013846528\t1064537991081799680\t1064668683698663424\t1064790732324638721\t1064877327979593728\t1065041577888894976\t1065308995060146177\t1065413524166463490\t1065549844436197376\t1065824633667108864\t1066278811082416128\t1066409113976885250\t1066466855781613569\t1066531399597838336\t1066695664044834816\t1066814112762605569\t1066833223387246594\t1066986547277414400\t1067144379561840640\t1067145828052156416\t1067214117025579008\t1067425351964532736\t1067435397746053122\t1067445442701611008\t1067540233741262848\t1067740959600009217\t1067759656615927808\t1067772844195618816\t1067807320262946816\t1068328458890039296\t1070060528641425410\t1070354919721439232\t1071082837334482944\t1071170615023992833\t1072154007076986880\t1072160826814054400\t1072458749330276352\t1072476936801316871\t1072482666493239296\t1072484362648723457\t1072488542419460102\t1072520911352463360\t1072566851362807808\t1072566851178299394\t1072591046423523330\t1072820367536857089\t1072821058103885825\t1072839127354228736\t1072852891755560960\t1072854068106854400\t1072902743201865728\t1072909945799098370\t1072932834556477440\t1072940427702083591\t1073381395836235777\t1073389855973863424\t1073390413354958848\t1073760021526310915\t1073760212371136512\t1073760309687578624\t1073762057860849664\t1073765582539972609\t1073766052230647808\t1073767381732864000\t1073767413160861696\t1073767715721101313\t1073768819317923840\t1073769851649437696\t1073771636535500800\t1073771899983781888\t1073772336560562183\t1073772508057284609\t1073772818939015168\t1073789506099138560\t1073790905365225472\t1073793840459395072\t1073819451865423872\t1073832300117409793\t1073845420185583616\t1073941332866031616\t1073966962655576064\t1073967388520038400\t1073973539693322245\t1073982188415262720\t1073984013621186561\t1074003934174429184\t1074004614117277701\t1074014925008326656\t1074015266827395074\t1074021559831011328\t1074024666950496261\t1074032219805622272\t1074033288107474946\t1074042931911233537\t1074058290521239552\t1074084461854277632\t1074087536551710722\t1074121296475275265
1,politifact12944,http://www.cq.com/doc/newsmakertranscripts-4942265?3,comments in Fayetteville NC,942953459\t8980098198\t16253717352\t16685132501\t18459384142\t20408136207\t22245152650\t76287470559444992\t91736060433928192\t167013050203914240\t202463039910461440\t231102720684220416\t303188036668243968\t327576133837139969\t428207972250968064\t550078280322543616\t552000999834271745\t575321511390679040\t605495719630225408\t608435381407739905\t620373874434183168\t663552087381274628\t709120156266397697\t710213578842836992\t791657074568622080\t791738101638897664\t801163013839581185\t811984896029822976\t865327039947173891\t868503315155496960\t887032957843771392\t956313411511554049\t962036861743984641\t970682643535802368\t979341616975634432\t992501849071775745\t1009814965111414784\t1017498833637576704\t1019271700817563649\t1026621333902372865\t1034574401067515909\t1041102754201124864\t1042451606065885184\t1045731999925915649\t1047885310200348674\t1050935364481941506\t1051547030668156928\t1055685980533989376\t1064862539220230144\t1065281573686259712\t1069018829693476864
2,politifact333,https://web.archive.org/web/20080204072132/http://www.rockymountainnews.com:80/news/2008/feb/02/romney-tells-denver-crowd-he-can-salvage-economy/,"Romney makes pitch, hoping to close deal : Elections : The Rocky Mountain News",
3,politifact4358,https://web.archive.org/web/20110811143753/http://www.dems.gov/press/democratic-leaders-say-house-democrats-are-united-against-gop-default-act,Democratic Leaders Say House Democrats Are United Against GOP Default Act,
4,politifact779,https://web.archive.org/web/20070820164107/http://www.whitehouse.gov/omb/budget/fy2008/outlook.html,"Budget of the United States Government, FY 2008",89804710374154240\t91270460595109888\t96039619300306944\t106486564980400129\t239664119966470145\t531736791594713089\t975594288800194561\t1049636292374528001


In [98]:
dfs_dict['gossipcop_fake'].head()

Unnamed: 0,id,news_url,title,tweet_ids
0,gossipcop-2493749932,www.dailymail.co.uk/tvshowbiz/article-5874213/Did-Miley-Cyrus-Liam-Hemsworth-secretly-married.html,Did Miley Cyrus and Liam Hemsworth secretly get married?,284329075902926848\t284332744559968256\t284335412590297089\t284359918792880128\t284385385151332353\t284456388833452032\t284644878267318273\t284656558963564545\t284705391965794304\t284705391957401601\t284762425553723392\t284766089827721217\t284769766806257664\t284769769490620416\t284769775442337792\t284769772980301824\t284769778655186944\t284769777073917952\t284771913753042944\t284902943201837056\t284918939975303169\t285592266716884993\t285635421872979969\t285635516924301313\t285731324101287937\t295954533555793920\t296639632710459392\t296651276626825216\t700137152143699968\t700501642412560384\t700599457767890944\t700925213031141376\t757598980373045248\t757614017808437248\t757810204448743425\t757855454886293504\t818454872299163649\t818532208990371841\t818622382524997632\t818623644737871872\t824328059335692290\t840237926642536449\t840277988067430400\t840305556736872448\t840312641042104320\t840320383437877248\t840321378150670336\t840325449389600768\t840346792671698945\t840359115754999810\t840359140119699457\t840366459675062274\t840396881586982912\t840413370650050560\t840440847527116800\t840444299732910080\t840462550135365633\t841849268646338560\t849678041055887360\t925851191560556544\t927426728653230080\t927531012212457473\t927588875576528897\t927588875551363072\t927589121190760449\t927606510406029314\t928442019529162752\t928532613056352256\t928653424752758784\t953201937574342658\t953336408650993664\t953692192064884737\t953999445384888321\t954307670344916992\t1010039778891452416\t1010184150970654720\t1010184410115727361\t1010191444617752576\t1010229013908992000\t1010262428670709766\t1010334767701561344\t1010393809144799232\t1010418444016259072\t1011127764261396481\t1013723578834014208\t1013723601571340291\t1013723813685669889\t1016000562893123584\t1016431705132093440\t1036444296348663808\t1060594707447853058\t1060595446488358912\t1060679502240436224\t1060683397649952770\t1060708573590773761\t1060722227237318656\t1060812126200258560
1,gossipcop-4580247171,hollywoodlife.com/2018/05/05/paris-jackson-cara-delevingne-matching-outfits-night-out-nyc-dating-pic/,Paris Jackson & Cara Delevingne Enjoy Night Out In Matching Outfits: They Have ‘Amazing Chemistry’,992895508267130880\t992897935418503169\t992899529329569792\t992904299674923009\t992906330590597120\t992916937536516097\t992923845446328320\t992992387315060737\t993002463761453056\t993030134004248577\t993060047700676609\t993090513770180615\t993105619602227203\t993133763071651840\t993134760506675200
2,gossipcop-941805037,variety.com/2017/biz/news/tax-march-donald-trump-protest-1202031487/,Celebrities Join Tax March in Protest of Donald Trump,853359353532829696\t853359576543920128\t853359758400729088\t853360019814973440\t853360073564868608\t853360086479101952\t853360092174966784\t853360091411562496\t853360111670054913\t853360110843813888\t853360113872130048\t853360128501862400\t853360131999793152\t853360195149418496\t853360323981701120\t853360366302220288\t853360619352883200\t853360803646496769\t853361601281445890\t853362860830928897\t853363803848880128\t853364005624324098\t853366133638012929\t853366622190436352\t853367360777986048\t853367524565688320\t853367882004275200\t853368967108448257\t853369009592512516\t853369363038064640\t853369449457606657\t853369574552723456\t853369732589924352\t853370675913826304\t853371449351872512\t853371487360765952\t853371710820823040\t853371722292240384\t853371734732537856\t853372207912931328\t853372329669283840\t853372910211223552\t853373896937971712\t853375270555799552\t853375443751325696\t853375458766974976\t853378059067510784\t853380105376329733\t853383424068988928\t853383430758969344\t853388476410200064\t853388528474087424\t853394168613191680\t853396186157391872\t853399274939219968\t853406473489141761\t853462336371392513\t853467507511971845\t853471387113078784\t853516820069756928\t853518110518722560\t853546300222001154\t853556177111834624\t853560418400391169\t853561802633822208\t853561812419084288\t853563392707239937\t853563913161416704\t853563925299777540\t853565283432136706\t853566716101537792\t853566767293059073\t853567661535612928\t853582937144426496\t853586468182396928\t853615408066301954\t853648619337109504\t853705948287741956\t853748960594452480\t853771702798385152\t853792365835952128\t853805006511722497\t853806813107568640\t853910232447242241\t853960825173712896\t867415830727974917
3,gossipcop-2547891536,www.dailymail.co.uk/femail/article-3499192/Do-blondes-REALLY-fun-Cindy-Crawford-s-model-daughter-Kaia-Gerber-puts-theory-test-peroxide-wig-dining-Harry-Styles.html,Cindy Crawford's daughter Kaia Gerber wears a wig after dining with Harry Styles,988821905196158981\t988824206556172288\t988825130838077440\t988827402749919232\t988835411609845761\t988851564243800066\t988854637531992064\t988859447832334336\t988890321873395712\t988968654346825728\t989008902703534080\t989101743375114241\t989103880364613632\t990221524102713345
4,gossipcop-5476631226,variety.com/2018/film/news/list-2018-oscar-nominations-1202668757/,Full List of 2018 Oscar Nominations – Variety,955792793632432131\t955795063925301249\t955798007861170178\t955801005643452418\t955801452781408256\t955801716196347905\t955803176430657536\t955804615492210688\t955805286517833728\t955805289370013701\t955805861858852871\t955806284783251467\t955807062029717505\t955807741968347136\t955811182807605248\t955811991763013632\t955815165974294532\t955815369683492864\t955815782847524865\t955818359131484161\t955820617814630400\t955822571173195776\t955824207748845569\t955830124443713536\t955833485943558144\t955836296336986112\t955836645504413696\t955836645475045376\t955836645437247489\t955842212188839936\t955845345807912964\t955849698853773312\t955854366870827008\t955857618345619458\t955864532404097024\t955868902252199936\t955869558618951680\t955872387806920704\t955900348123762689\t955913024174469121\t955922790187597824\t955931807551193088\t955941036844568576\t955941360317751296\t956054980686905345\t956131095312965635\t956439663430062080\t956443719623958529\t956501140564250624\t956594492643135488\t956769381941260290\t956949166051250176\t957045409771343872\t957092190991323136\t957226518241009664\t957487299754975233\t957903405166440448\t958282715928940544\t966663844302020609\t969260265593327616\t969605172199657472\t969686533589610497\t970032900770029568\t970423438690127872\t1027289193964363779\t1037382776390070272


In [99]:
dfs_dict['tweets_gossipcop_fake'].merge(dfs_dict['gossipcop_fake'],left_on='news_id',right_on='id',how='left').head()

Unnamed: 0,news_id,tweet_id,text,label,id,news_url,title,tweet_ids
0,gossipcop-2493749932,284332744559968256,Did Miley Cyrus and Liam Hemsworth secretly get married? | In Case You Didn't Know http://t.co/daJAmp83,0,gossipcop-2493749932,www.dailymail.co.uk/tvshowbiz/article-5874213/Did-Miley-Cyrus-Liam-Hemsworth-secretly-married.html,Did Miley Cyrus and Liam Hemsworth secretly get married?,284329075902926848\t284332744559968256\t284335412590297089\t284359918792880128\t284385385151332353\t284456388833452032\t284644878267318273\t284656558963564545\t284705391965794304\t284705391957401601\t284762425553723392\t284766089827721217\t284769766806257664\t284769769490620416\t284769775442337792\t284769772980301824\t284769778655186944\t284769777073917952\t284771913753042944\t284902943201837056\t284918939975303169\t285592266716884993\t285635421872979969\t285635516924301313\t285731324101287937\t295954533555793920\t296639632710459392\t296651276626825216\t700137152143699968\t700501642412560384\t700599457767890944\t700925213031141376\t757598980373045248\t757614017808437248\t757810204448743425\t757855454886293504\t818454872299163649\t818532208990371841\t818622382524997632\t818623644737871872\t824328059335692290\t840237926642536449\t840277988067430400\t840305556736872448\t840312641042104320\t840320383437877248\t840321378150670336\t840325449389600768\t840346792671698945\t840359115754999810\t840359140119699457\t840366459675062274\t840396881586982912\t840413370650050560\t840440847527116800\t840444299732910080\t840462550135365633\t841849268646338560\t849678041055887360\t925851191560556544\t927426728653230080\t927531012212457473\t927588875576528897\t927588875551363072\t927589121190760449\t927606510406029314\t928442019529162752\t928532613056352256\t928653424752758784\t953201937574342658\t953336408650993664\t953692192064884737\t953999445384888321\t954307670344916992\t1010039778891452416\t1010184150970654720\t1010184410115727361\t1010191444617752576\t1010229013908992000\t1010262428670709766\t1010334767701561344\t1010393809144799232\t1010418444016259072\t1011127764261396481\t1013723578834014208\t1013723601571340291\t1013723813685669889\t1016000562893123584\t1016431705132093440\t1036444296348663808\t1060594707447853058\t1060595446488358912\t1060679502240436224\t1060683397649952770\t1060708573590773761\t1060722227237318656\t1060812126200258560
1,gossipcop-2493749932,284335412590297089,Did Miley Cyrus and Liam Hemsworth secretly get married? http://t.co/KwgEiKOn,0,gossipcop-2493749932,www.dailymail.co.uk/tvshowbiz/article-5874213/Did-Miley-Cyrus-Liam-Hemsworth-secretly-married.html,Did Miley Cyrus and Liam Hemsworth secretly get married?,284329075902926848\t284332744559968256\t284335412590297089\t284359918792880128\t284385385151332353\t284456388833452032\t284644878267318273\t284656558963564545\t284705391965794304\t284705391957401601\t284762425553723392\t284766089827721217\t284769766806257664\t284769769490620416\t284769775442337792\t284769772980301824\t284769778655186944\t284769777073917952\t284771913753042944\t284902943201837056\t284918939975303169\t285592266716884993\t285635421872979969\t285635516924301313\t285731324101287937\t295954533555793920\t296639632710459392\t296651276626825216\t700137152143699968\t700501642412560384\t700599457767890944\t700925213031141376\t757598980373045248\t757614017808437248\t757810204448743425\t757855454886293504\t818454872299163649\t818532208990371841\t818622382524997632\t818623644737871872\t824328059335692290\t840237926642536449\t840277988067430400\t840305556736872448\t840312641042104320\t840320383437877248\t840321378150670336\t840325449389600768\t840346792671698945\t840359115754999810\t840359140119699457\t840366459675062274\t840396881586982912\t840413370650050560\t840440847527116800\t840444299732910080\t840462550135365633\t841849268646338560\t849678041055887360\t925851191560556544\t927426728653230080\t927531012212457473\t927588875576528897\t927588875551363072\t927589121190760449\t927606510406029314\t928442019529162752\t928532613056352256\t928653424752758784\t953201937574342658\t953336408650993664\t953692192064884737\t953999445384888321\t954307670344916992\t1010039778891452416\t1010184150970654720\t1010184410115727361\t1010191444617752576\t1010229013908992000\t1010262428670709766\t1010334767701561344\t1010393809144799232\t1010418444016259072\t1011127764261396481\t1013723578834014208\t1013723601571340291\t1013723813685669889\t1016000562893123584\t1016431705132093440\t1036444296348663808\t1060594707447853058\t1060595446488358912\t1060679502240436224\t1060683397649952770\t1060708573590773761\t1060722227237318656\t1060812126200258560
2,gossipcop-2493749932,284359918792880128,Afternoon Tea – Daily Link Roundup: Did Miley Cyrus and Liam Hemsworth secretly get married? – Celebitchy Mila K... http://t.co/vuJL1TB4,0,gossipcop-2493749932,www.dailymail.co.uk/tvshowbiz/article-5874213/Did-Miley-Cyrus-Liam-Hemsworth-secretly-married.html,Did Miley Cyrus and Liam Hemsworth secretly get married?,284329075902926848\t284332744559968256\t284335412590297089\t284359918792880128\t284385385151332353\t284456388833452032\t284644878267318273\t284656558963564545\t284705391965794304\t284705391957401601\t284762425553723392\t284766089827721217\t284769766806257664\t284769769490620416\t284769775442337792\t284769772980301824\t284769778655186944\t284769777073917952\t284771913753042944\t284902943201837056\t284918939975303169\t285592266716884993\t285635421872979969\t285635516924301313\t285731324101287937\t295954533555793920\t296639632710459392\t296651276626825216\t700137152143699968\t700501642412560384\t700599457767890944\t700925213031141376\t757598980373045248\t757614017808437248\t757810204448743425\t757855454886293504\t818454872299163649\t818532208990371841\t818622382524997632\t818623644737871872\t824328059335692290\t840237926642536449\t840277988067430400\t840305556736872448\t840312641042104320\t840320383437877248\t840321378150670336\t840325449389600768\t840346792671698945\t840359115754999810\t840359140119699457\t840366459675062274\t840396881586982912\t840413370650050560\t840440847527116800\t840444299732910080\t840462550135365633\t841849268646338560\t849678041055887360\t925851191560556544\t927426728653230080\t927531012212457473\t927588875576528897\t927588875551363072\t927589121190760449\t927606510406029314\t928442019529162752\t928532613056352256\t928653424752758784\t953201937574342658\t953336408650993664\t953692192064884737\t953999445384888321\t954307670344916992\t1010039778891452416\t1010184150970654720\t1010184410115727361\t1010191444617752576\t1010229013908992000\t1010262428670709766\t1010334767701561344\t1010393809144799232\t1010418444016259072\t1011127764261396481\t1013723578834014208\t1013723601571340291\t1013723813685669889\t1016000562893123584\t1016431705132093440\t1036444296348663808\t1060594707447853058\t1060595446488358912\t1060679502240436224\t1060683397649952770\t1060708573590773761\t1060722227237318656\t1060812126200258560
3,gossipcop-2493749932,284385385151332353,Did Miley Cyrus and Liam Hemsworth secretly get married? http://t.co/KJnBxgvk,0,gossipcop-2493749932,www.dailymail.co.uk/tvshowbiz/article-5874213/Did-Miley-Cyrus-Liam-Hemsworth-secretly-married.html,Did Miley Cyrus and Liam Hemsworth secretly get married?,284329075902926848\t284332744559968256\t284335412590297089\t284359918792880128\t284385385151332353\t284456388833452032\t284644878267318273\t284656558963564545\t284705391965794304\t284705391957401601\t284762425553723392\t284766089827721217\t284769766806257664\t284769769490620416\t284769775442337792\t284769772980301824\t284769778655186944\t284769777073917952\t284771913753042944\t284902943201837056\t284918939975303169\t285592266716884993\t285635421872979969\t285635516924301313\t285731324101287937\t295954533555793920\t296639632710459392\t296651276626825216\t700137152143699968\t700501642412560384\t700599457767890944\t700925213031141376\t757598980373045248\t757614017808437248\t757810204448743425\t757855454886293504\t818454872299163649\t818532208990371841\t818622382524997632\t818623644737871872\t824328059335692290\t840237926642536449\t840277988067430400\t840305556736872448\t840312641042104320\t840320383437877248\t840321378150670336\t840325449389600768\t840346792671698945\t840359115754999810\t840359140119699457\t840366459675062274\t840396881586982912\t840413370650050560\t840440847527116800\t840444299732910080\t840462550135365633\t841849268646338560\t849678041055887360\t925851191560556544\t927426728653230080\t927531012212457473\t927588875576528897\t927588875551363072\t927589121190760449\t927606510406029314\t928442019529162752\t928532613056352256\t928653424752758784\t953201937574342658\t953336408650993664\t953692192064884737\t953999445384888321\t954307670344916992\t1010039778891452416\t1010184150970654720\t1010184410115727361\t1010191444617752576\t1010229013908992000\t1010262428670709766\t1010334767701561344\t1010393809144799232\t1010418444016259072\t1011127764261396481\t1013723578834014208\t1013723601571340291\t1013723813685669889\t1016000562893123584\t1016431705132093440\t1036444296348663808\t1060594707447853058\t1060595446488358912\t1060679502240436224\t1060683397649952770\t1060708573590773761\t1060722227237318656\t1060812126200258560
4,gossipcop-2493749932,284456388833452032,"""Did Miley Cyrus and Liam Hemsworth secretly get married?"" http://t.co/2rqbmSJn",0,gossipcop-2493749932,www.dailymail.co.uk/tvshowbiz/article-5874213/Did-Miley-Cyrus-Liam-Hemsworth-secretly-married.html,Did Miley Cyrus and Liam Hemsworth secretly get married?,284329075902926848\t284332744559968256\t284335412590297089\t284359918792880128\t284385385151332353\t284456388833452032\t284644878267318273\t284656558963564545\t284705391965794304\t284705391957401601\t284762425553723392\t284766089827721217\t284769766806257664\t284769769490620416\t284769775442337792\t284769772980301824\t284769778655186944\t284769777073917952\t284771913753042944\t284902943201837056\t284918939975303169\t285592266716884993\t285635421872979969\t285635516924301313\t285731324101287937\t295954533555793920\t296639632710459392\t296651276626825216\t700137152143699968\t700501642412560384\t700599457767890944\t700925213031141376\t757598980373045248\t757614017808437248\t757810204448743425\t757855454886293504\t818454872299163649\t818532208990371841\t818622382524997632\t818623644737871872\t824328059335692290\t840237926642536449\t840277988067430400\t840305556736872448\t840312641042104320\t840320383437877248\t840321378150670336\t840325449389600768\t840346792671698945\t840359115754999810\t840359140119699457\t840366459675062274\t840396881586982912\t840413370650050560\t840440847527116800\t840444299732910080\t840462550135365633\t841849268646338560\t849678041055887360\t925851191560556544\t927426728653230080\t927531012212457473\t927588875576528897\t927588875551363072\t927589121190760449\t927606510406029314\t928442019529162752\t928532613056352256\t928653424752758784\t953201937574342658\t953336408650993664\t953692192064884737\t953999445384888321\t954307670344916992\t1010039778891452416\t1010184150970654720\t1010184410115727361\t1010191444617752576\t1010229013908992000\t1010262428670709766\t1010334767701561344\t1010393809144799232\t1010418444016259072\t1011127764261396481\t1013723578834014208\t1013723601571340291\t1013723813685669889\t1016000562893123584\t1016431705132093440\t1036444296348663808\t1060594707447853058\t1060595446488358912\t1060679502240436224\t1060683397649952770\t1060708573590773761\t1060722227237318656\t1060812126200258560


In [100]:
#fake distribution

dfs_dict['gossipcop_fake']['label']=0
dfs_dict['gossipcop_real']['label']=1
dfs_dict['politifact_fake']['label']=0
dfs_dict['politifact_real']['label']=1

news_dfs = [dfs_dict['gossipcop_fake'],dfs_dict['gossipcop_real'],dfs_dict['politifact_fake'],dfs_dict['politifact_real']]

complete_news_df = pd.concat(news_dfs)


In [101]:
complete_news_df.sample(5)

Unnamed: 0,id,news_url,title,tweet_ids,label
4861,gossipcop-9720326723,www.refinery29.com/2017/08/169831/bella-hadid-victorias-secret-fashion-show-2017,Bella Hadid Will Be Walking This Year's Victoria's Secret Fashion Show,935990354058338304\t936000963147632640\t936107543830646785\t936296631191838720,0
16415,gossipcop-921793,https://calgarysun.com/entertainment/celebrity/how-blake-shelton-and-kelly-clarkson-made-adam-levine-cry/wcm/7406b6ae-d592-405e-9075-514eb9af11d9,How Blake Shelton and Kelly Clarkson made Adam Levine cry,976083707596623873\t976083954066485249\t976084866130472962\t976085225984987137\t976085415861084160\t976086067060211712\t976086845103652864\t976087525289791488\t976087949292048385\t976089318262497281\t976091886145458176\t976092268955348993\t976096158471938049\t976096422859804672\t976097455648501761\t976107368697118721\t976107476964737024\t976116498539433984\t976269297847644161\t976331628975984641\t976343778532315137\t976694888539611136\t977243542652297216,1
8476,gossipcop-901374,https://www.etonline.com/rob-dyrdek-welcomes-baby-girl-wife-bryiana-find-out-her-name-93314,Rob Dyrdek Welcomes Baby Girl With Wife Bryiana -- Find Out Her Name!,946902401935699968\t946904204450377728\t946905207925100545\t946905458274656256\t946905651506241536\t946905688537812992\t946905998844944384\t946906007514513409\t946906782944956417\t946908731274510336\t946908807778664448\t946908902238703616\t946909038700359682\t946910191999598592\t946912068225130496\t946918445626556416\t946925128822247424\t946930020836102144\t946932844756074496\t946937612752900097\t946942786770792448\t946946567155671040\t946948724130906113\t946950238354464768\t946969207476031489\t946970278831194115\t946971991583498241\t946988043285794816\t946988157802893313\t946988181903245314\t946988271988600834\t946988295569006592\t946988301755600896\t946988298043645952\t946988305761087488\t946988355224555520\t946988380637859840\t946988400640516102\t946988499168833536\t946988511110094848\t946988509319049217\t946988553019510784\t946988557415145472\t946988595856003072\t946988741750591488\t946988773975445504\t946988803654287360\t946988808595152896\t946988840023199744\t946989006486687744\t946989020038475777\t946989085746434048\t946989160484786176\t946989277413625856\t946990996138668033\t946991000286789632\t946991667353075717\t947006539621072897\t947051279297253376\t947067605642305536\t947067620624359429\t947068600300523520\t947160758919532544\t947164923770966016\t947164927013167104\t947164926971338752\t947164926774206464\t947165204827246595\t947168695272407040\t947170938914709504\t947170984028667904\t947192729473339393\t947198252004331522\t947318221610287104\t947364467813187584\t947816586068377600\t947842917930029056,1
11873,gossipcop-905253,https://www.npr.org/sections/thetwo-way/2018/01/10/576994537/james-franco-denies-sexual-misconduct-claims-saying-theyre-not-accurate,"James Franco Denies Sexual Misconduct Claims, Saying They're 'Not Accurate'",950960477311512576\t950964715261345792\t950973627905736704\t950984002441240576\t950998666982047745\t950998697843736576\t950998711261433857\t951000595233566720\t951010242921295873\t951012033805217792\t951012829775257600\t951014155049406464\t951015368180555776\t951015950966558720\t951016317200498688\t951017051455459328\t951017186885296128\t951017195785674754\t951018015448952832\t951018861809291265\t951019782018650112\t951046840279060480\t951054354483699712\t951054588962136064\t951054656532287489\t951054765290438656\t951054765047234565\t951054793308561408\t951055505522380800\t951055659029671936\t951055781281099777\t951057394221703168\t951061410666176513\t951065205768847360\t951065791184752640\t951066652031975424\t951069658957918210\t951070227495714816\t951070653372911616\t951071149521162240\t951071151429640192\t951071158245322752\t951071156928315393\t951071156232007681\t951071155384799232\t951071169989390337\t951071168982745090\t951071175353880577\t951072915029557248\t951072913377058818\t951072921719468032\t951072921081888768\t951072920914153473\t951072928749142017\t951072926974951425\t951072925855113216\t951072925624418304\t951072925246828544\t951072933857734658\t951072938664456192\t951073167229030401\t951074420314972160\t951074425725640705\t951074425352286208\t951074947165691904\t951076475691663360\t951077215495585792\t951079940891426816\t951079993500733440\t951081088318951424\t951081824469561345\t951082466911182849\t951082976275709952\t951083662048661504\t951084853604139010\t951086654319742976\t951086793788657664\t951088152852205568\t951088594759946244\t951090200473669634\t951092577553432576\t951095111215181825\t951095200193089537\t951095543161344000\t951097151383248897\t951097590615076865\t951097990684532736\t951098853847195648\t951099575594561536\t951099870424780800\t951100951796092928\t951101343225151490\t951101342663266304\t951101588461977600\t951101628853211136\t951102156244856834\t951102422008647681\t951102433392037888\t951102736195641345\t951102740041814019\t951102869662396416\t951103351843799041\t951103424233394177\t951103434169733120\t951103441769848832\t951103542151938053\t951104126108291072\t951104401300688897\t951105076457811968\t951105106145103872\t951105426849923072\t951105632983162881\t951105637307445248\t951105753103982592\t951107375297171456\t951107587822538752\t951107600606748672\t951108502461800449\t951108516349186050\t951108523731177483\t951110303659511810\t951110308701097984\t951110352502214656\t951111940201476097\t951112856975675392\t951113225269055494\t951113779663732737\t951114126893441025\t951114131821785088\t951114879255138307\t951115545742659585\t951115625857986560\t951116155598659584\t951118347311095808\t951118526814699521\t951118983658459136\t951120474376941568\t951120498422824960\t951121650908585984\t951124177762816000\t951126276013772800\t951128322666196992\t951128324134203392\t951129812336340992\t951132549127876608\t951134170192920576\t951136076315484162\t951136921962582016\t951141206678945792\t951144112673128449\t951146132167712769\t951147599498592256\t951149764334120960\t951150778567352320\t951151915072671744\t951152298239066112\t951154223760240646\t951154295461875712\t951154781409685509\t951155070938296320\t951155438812311557\t951155471951446018\t951155824868573185\t951155932767105024\t951157242174242816\t951158415606370305\t951159255423422464\t951159419190022144\t951159600794882048\t951160599232172032\t951164093544747008\t951164204735713280\t951164555828387840\t951164871563010050\t951164872439549953\t951164999027888129\t951166541264977921\t951166619891503104\t951166646764408834\t951167026680270848\t951168218588176389\t951168940029501441\t951169911606403073\t951170333641494528\t951170553313988608\t951170565242580992\t951170755114553346\t951171163643924480\t951171250721665024\t951171384180396032\t951171691614367744\t951171690641350656\t951171693598224385\t951172326715932672\t951172650604351488\t951172784922726401\t951174507615739904\t951176899555979264\t951179647185768448\t951179873682411520\t951180594167369728\t951181749110927360\t951184138836561925\t951188068438040578\t951188865959825409\t951189365694197760\t951194842239045633\t951195462652055552\t951195479248900096\t951196860814827520\t951197023809699841\t951197132366733312\t951197509271146496\t951198035421421568\t951198139096227840\t951199512130998274\t951200113195716608\t951201815051362304\t951202771356868609\t951206918189731841\t951207106845331459\t951207978471800833\t951208292260425728\t951209535733190658\t951209708626595842\t951211950997819397\t951213158345175045\t951219351184662529\t951219607091732481\t951223174053662720\t951224637005946881\t951227154431389696\t951228156106682368\t951230916604567553\t951234448980406272\t951234703117307904\t951240266224082944\t951244299705831424\t951246843895152640\t951249147994124288\t951249822039801856\t951253789310439425\t951255893567516672\t951256167522607105\t951264901112127488\t951268039693078528\t951268680465297409\t951271181033844736\t951272453988388864\t951272641641447424\t951274916837502977\t951285061491437569\t951292535825190913\t951304213048410112\t951304394586427392\t951305003548991488\t951305187775393792\t951307685680549888\t951312660812804102\t951317747614584833\t951321600531542017\t951325386876641280\t951325386255806464\t951331437822832640\t951349117174902785\t951355499450191872\t951363048702439426\t951363054519947264\t951363232484286466\t951363230571606016\t951363262637060097\t951363861877227520\t951364242145468416\t951364240367120384\t951367374258794496\t951380667023183872\t951409104353931265\t951420315615232000\t951420981041684480\t951424521453621250\t951456332435075072\t951459550862938112\t951461202961584128\t951476372492488709\t951488897690820609\t951488897636294658\t951491036899033088\t951491038912290818\t951494146115735552\t951494557249822720\t951495355757137920\t951510542404988928\t951530154085441536\t951538231417417728\t951616504260423680\t951622913605689344\t951676474255466497\t951814717454016512\t951969288042119168\t952178102024601601\t952241330922471424\t952299285189709824\t952334357859102732\t952350604143333376\t952374819051491329\t952434355808346112\t952911086537277443\t952980008338849794\t953269456532332545\t953582931834015744\t954625943976529921,1
11115,gossipcop-952908,https://www.usatoday.com/story/money/2018/07/18/lays-tastes-america-new-flavors-launch-test/788797002/,Fried-pickle potato chips? We taste tested Frito Lay's region-themed snacks,1019909742280441857,1


In [102]:
px.bar(complete_news_df['label'].value_counts(),labels={'index':'News label', 'value':'Number of News'})

In [103]:
def count_number_of_tweets(string_object):
    try:
        return string_object.split('\t').__len__()
    except:
        return 0 
    

def count_number_of_words(string_object):
    try:
        return string_object.split().__len__()
    except:
        return 0 

In [104]:
complete_news_df['number_of_tweets'] = complete_news_df['tweet_ids'].apply(count_number_of_tweets)

In [105]:
complete_news_df['number_of_tweets'].sum()

2063442

In [106]:
complete_news_df.drop(columns=['tweet_ids'],inplace=True)

In [107]:


def get_domain(url):
    try:
        if not url.startswith('http'):
            url= 'http://' + url
        return urlparse(url).netloc
    except:
        return np.nan

In [108]:
complete_news_df['domain'] = complete_news_df['news_url'].apply(get_domain)

In [109]:
complete_news_df['Title Lenght'] = complete_news_df['title'].apply(count_number_of_words)
complete_news_df['Title Lenght'].head()

0     9
1    15
2     9
3    13
4     8
Name: Title Lenght, dtype: int64

In [110]:

group_1 = complete_news_df[complete_news_df['label']==0]['Title Lenght'].values
group_2 = complete_news_df[complete_news_df['label']==1]['Title Lenght'].values
X = [group_1,group_2]
group_labels = [f'Fake Article Titles Lenght',f'True Article Titles Lenght']
fig = ff.create_distplot(X,group_labels=group_labels,show_hist=False)
fig.show()

In [111]:
complete_news_df.head()

Unnamed: 0,id,news_url,title,label,number_of_tweets,domain,Title Lenght
0,gossipcop-2493749932,www.dailymail.co.uk/tvshowbiz/article-5874213/Did-Miley-Cyrus-Liam-Hemsworth-secretly-married.html,Did Miley Cyrus and Liam Hemsworth secretly get married?,0,97,www.dailymail.co.uk,9
1,gossipcop-4580247171,hollywoodlife.com/2018/05/05/paris-jackson-cara-delevingne-matching-outfits-night-out-nyc-dating-pic/,Paris Jackson & Cara Delevingne Enjoy Night Out In Matching Outfits: They Have ‘Amazing Chemistry’,0,15,hollywoodlife.com,15
2,gossipcop-941805037,variety.com/2017/biz/news/tax-march-donald-trump-protest-1202031487/,Celebrities Join Tax March in Protest of Donald Trump,0,86,variety.com,9
3,gossipcop-2547891536,www.dailymail.co.uk/femail/article-3499192/Do-blondes-REALLY-fun-Cindy-Crawford-s-model-daughter-Kaia-Gerber-puts-theory-test-peroxide-wig-dining-Harry-Styles.html,Cindy Crawford's daughter Kaia Gerber wears a wig after dining with Harry Styles,0,14,www.dailymail.co.uk,13
4,gossipcop-5476631226,variety.com/2018/film/news/list-2018-oscar-nominations-1202668757/,Full List of 2018 Oscar Nominations – Variety,0,66,variety.com,8


In [21]:
#temp_df = pd.pivot_table(complete_news_df,columns=['label','domain'],aggfunc='count',fill_value=0)
temp_df = complete_news_df.groupby(['label','domain']).size()
temp_df = temp_df.unstack(fill_value=0).stack()
temp_df = temp_df.rename('Count').reset_index().fillna(0)
total_df = temp_df.groupby('domain').sum().rename(columns={'Count':'Total'}).reset_index()
total_df
temp_df = temp_df.merge(total_df
              [['domain','Total']],
              left_on='domain',
              right_on='domain',
              how='left')
temp_df.sort_values(by=['Total'],ascending=False,inplace=True)
px.bar(temp_df,x='domain',color='label',y='Count',height=600)

In [114]:
temp_df['Frequency'] = temp_df['Count']/temp_df['Total']
temp_df = temp_df.merge(temp_df[temp_df['label']==1][['domain','Frequency']].rename(columns={'Frequency':'FrequencyPositive'}),left_on='domain',right_on='domain',how='left')
temp_df = temp_df.merge(temp_df[temp_df['label']==0][['domain','Frequency']].rename(columns={'Frequency':'FrequencyNegative'}),left_on='domain',right_on='domain',how='left')
temp_df.head(10)

Unnamed: 0,label,domain,Count,Total,Frequency,FrequencyPositive_x,FrequencyNegative_x,FrequencyPositive_y,FrequencyNegative_y,FrequencyPositive,FrequencyNegative
0,1,www.eonline.com,0,154,0.0,0.0,1.0,0.0,1.0,0.0,1.0
1,1,www.imdb.com,0,145,0.0,0.0,1.0,0.0,1.0,0.0,1.0
2,1,www.nationalenquirer.com,0,53,0.0,0.0,1.0,0.0,1.0,0.0,1.0
3,1,yournewswire.com,0,25,0.0,0.0,1.0,0.0,1.0,0.0,1.0
4,1,www.celebitchy.com,0,21,0.0,0.0,1.0,0.0,1.0,0.0,1.0
5,1,www.celebdirtylaundry.com,0,21,0.0,0.0,1.0,0.0,1.0,0.0,1.0
6,1,www.pressreader.com,0,20,0.0,0.0,1.0,0.0,1.0,0.0,1.0
7,1,starmagazine.com,0,16,0.0,0.0,1.0,0.0,1.0,0.0,1.0
8,1,www.dailymotion.com,0,12,0.0,0.0,1.0,0.0,1.0,0.0,1.0
9,1,www.magzter.com,0,10,0.0,0.0,1.0,0.0,1.0,0.0,1.0


In [115]:
temp_df.sort_values(['FrequencyNegative','label'],inplace=True,ascending=False)
REFERENCES = 15
px.bar(temp_df[temp_df['Total']>REFERENCES],x='domain',color='label',y='Frequency',height=600,title=f'Websites having more than {REFERENCES} references')

In [116]:
temp_df.sort_values(['FrequencyNegative','label'],inplace=True,ascending=False)
px.bar(temp_df[temp_df['Total']<REFERENCES],x='domain',color='label',y='Frequency',height=600,title=f'Websites having less than {REFERENCES} references')

In [117]:
# density of fake news in sites where total less or equal 30
# density of fake news in sites where total more than 30

DIVIDER = 50
group_1 = temp_df[temp_df['Total']>DIVIDER]['FrequencyNegative'].values
group_2 = temp_df[temp_df['Total']<=DIVIDER]['FrequencyNegative'].values
X = [group_1,group_2]
group_labels = [f'Sites having more than {DIVIDER} references',f'Sites having less than {DIVIDER} references']
fig = ff.create_distplot(X,group_labels=group_labels,show_hist=False)
fig.update_layout(title='Negative Frequency')
fig.show()

In [118]:
px.scatter(temp_df[temp_df['label']==1],x='Total',y='FrequencyNegative',opacity=0.2)

# Creating data processing pipeline

In [149]:

nltk.download('stopwords')
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

def preprocess_text_for_column(dataframe,column):
    # remove twitter links
    dataframe[column]  = dataframe[column].apply(lambda x: re.sub(r'http\S+', '', x))
    # remove punctuation
    dataframe[column] = dataframe[column].str.replace('[{}]'.format(string.punctuation), '')
    # remove special characters
    dataframe[column] = dataframe[column].str.replace('[^a-zA-Z0-9\s]', '')
    # remove whitespaces
    dataframe[column] = dataframe[column].str.replace(r'\s+', ' ', regex=True)
    # additionaly remove line start and end spaces
    dataframe[column] = dataframe[column].str.strip()
    # remove stopwords
    dataframe[column] = dataframe[column].apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in stop_words]))
    #removing awkward photo links
    dataframe[column] = dataframe[column].apply(lambda x: ' '.join([word for word in x.split() if not word.startswith('pictwittercom')]))
    # stem text
    # dataframe[column] = dataframe[column].apply(lambda x: ' '.join([stemmer.stem(word) for word in x.split()]))
    # make lowercase
    dataframe[column] = dataframe[column].str.lower()
    return dataframe[column]

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vladp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [120]:
complete_news_df['TitleProcessed'] = preprocess_text_for_column(complete_news_df,'title')


The default value of regex will change from True to False in a future version.


The default value of regex will change from True to False in a future version.



In [122]:
complete_news_df['TitleProcessed'].sample(20)

16780    real housewives mysteries stolen homes mystery lovers friendshipending fights rocked franchise
15577                                             romeo santos proudly unveils wax figure fans petition
15130                                         happens abbey recap murrays slut marissas cheater edition
2252                                                   aritzia says meghan markle boosted earnings last
10072                                                            meghan trainor planning winter wedding
6754                                                        wags atlanta season 1 episode 2 dating game
102                                    selena gomez justin theroux may dating jennifer aniston freaking
748                               cw star caity lotz fights back online body shamers empowering message
8985                                              meanings behind kanye kim kardashian wests kids names
16414                                                          s

In [123]:
from collections import Counter

def count_words_frequency(dataframe,column):
    words = [word for title in dataframe[column] for word in title.split()]
    words_counter = Counter(words)
    words_df = pd.DataFrame(words_counter.items(),columns=['Word','Frequency'])
    return words_df

In [124]:
def create_dataframe_with_statistical_values_for_words(dataframe,columnWithWords):
    real_words_frequency = count_words_frequency(dataframe[dataframe['label']==1],columnWithWords)
    real_words_frequency['label'] = 1

    fake_words_frequency = count_words_frequency(dataframe[dataframe['label']==0],columnWithWords)
    fake_words_frequency['label'] = 0

    complete_freqs_dataframe = pd.concat([real_words_frequency,fake_words_frequency])
    temp_freq_dataframe = complete_freqs_dataframe[['Word','Frequency']].groupby('Word').sum().reset_index().rename(columns={'Frequency':'Total'})
    complete_freqs_dataframe = complete_freqs_dataframe.merge(temp_freq_dataframe,left_on='Word',right_on='Word',how='left')

    complete_freqs_dataframe['FrequencyRelative'] = complete_freqs_dataframe['Frequency']/complete_freqs_dataframe['Total']
    complete_freqs_dataframe = complete_freqs_dataframe.merge(complete_freqs_dataframe[complete_freqs_dataframe['label']==1][['Word','FrequencyRelative']].rename(columns={'FrequencyRelative':'FrequencyPositive'}),left_on='Word',right_on='Word',how='left')
    complete_freqs_dataframe = complete_freqs_dataframe.merge(complete_freqs_dataframe[complete_freqs_dataframe['label']==0][['Word','FrequencyRelative']].rename(columns={'FrequencyRelative':'FrequencyNegative'}),left_on='Word',right_on='Word',how='left')
    complete_freqs_dataframe.fillna(0,inplace=True)

    return complete_freqs_dataframe

In [125]:
complete_news_df[complete_news_df['id'].str.startswith('polit')]['label'].value_counts()

1    624
0    432
Name: label, dtype: int64

In [126]:
complete_freqs_for_news_polit = create_dataframe_with_statistical_values_for_words(complete_news_df[complete_news_df['id'].str.startswith('polit')],'TitleProcessed')

In [127]:
complete_freqs_for_news_polit.sample(2)

Unnamed: 0,Word,Frequency,label,Total,FrequencyRelative,FrequencyPositive,FrequencyNegative
1724,women,1,1,7,0.142857,0.142857,0.857143
3516,ended,1,0,1,1.0,0.0,1.0


In [128]:
px.bar(complete_freqs_for_news_polit.sort_values(['Total','label'],ascending=False),x='Word',y='Frequency',color='label')

In [130]:
px.bar(complete_freqs_for_news_polit[complete_freqs_for_news_polit['Total']>10].sort_values(['FrequencyNegative','label'],ascending=False),x='Word',y='FrequencyRelative',color='label')

In [131]:
complete_freqs_for_news_celeb = create_dataframe_with_statistical_values_for_words(complete_news_df[complete_news_df['id'].str.startswith('gos')],'TitleProcessed')

In [132]:
px.bar(complete_freqs_for_news_celeb.sort_values(['Total','label'],ascending=False),x='Word',y='Frequency',color='label')

In [135]:
px.bar(complete_freqs_for_news_celeb[complete_freqs_for_news_celeb['Total']>200].sort_values(['FrequencyNegative','label'],ascending=False),x='Word',y='FrequencyRelative',color='label')

In [136]:

dfs_dict['tweets_gossipcop_fake']['label']=0
dfs_dict['tweets_gossipcop_real']['label']=1
dfs_dict['tweets_politifact_fake']['label']=0
dfs_dict['tweets_politifact_real']['label']=1

news_dfs = [dfs_dict['tweets_gossipcop_fake'],dfs_dict['tweets_gossipcop_real'],dfs_dict['tweets_politifact_fake'],dfs_dict['tweets_politifact_real']]

complete_tweets_df = pd.concat(news_dfs)

In [137]:
complete_tweets_df.sample(10)

Unnamed: 0,news_id,tweet_id,text,label
92437,politifact13854,839178592508608512,RT SnopesVideo: Fact Check: FBI Issues Warrant for Obama’s Arrest After Confirming Illegal Trump Tower Wiretap?F… https://t.co/EE5bGX6org,0
606128,gossipcop-888389,1019251335110320132,MAMMA MIA! HERE WE GO AGAIN (2018) | Behind the Scenes of Musical Movie https://t.co/pVvb9NlM2C,1
561578,gossipcop-846462,857605902035111937,Robert De Niro Makes His Snapchat Debut and It's as Amazing as You'd Think https://t.co/NeB9RH9AvD,1
403263,politifact8629,805730068974759936,@CrockettWDSU you're up early again. No rest for the weary,1
120984,gossipcop-924126,979562159339384832,"Denim Trends Sienna Miller, Bella Hadid and More Are Wearing This Spring https://t.co/sMASM50FYw",1
386350,gossipcop-860652,875108679485411330,TV Scoop Awards: Vote for the Best Drama Actor and Actress https://t.co/GXaarC5SQV,1
62774,gossipcop-794053,775546915811233793,Ryan Lochte Gets Rushed by Protesters After Dancing With the Stars Performance - E! Online: E! OnlineRyan Loc... https://t.co/zdMJHJWvt8,1
166944,gossipcop-952758,1019603257667276807,"The Dark Knight Turns 10: Remembering Heath Ledger's Iconic Role One of the most memorable lines from The Dark Knight comes from Harvey Dent: ""You either die a hero, or you live long enough to see yourself become the villain."" But the Two-Face...https://t.co/F9ipW7pXqz",1
381181,gossipcop-936991,997893413180723200,"The Greatest Moments From the Royal Wedding, From the Smiling Page Boy to Candid Celebrity Reactions May 19, a special day. Prince Harry and Meghan Markle tied the knot at St. George's Chapel at Windsor Castle and there was no shortage of special moments… https://t.co/cADiWY6Prd",1
160801,politifact439,984072011214442497,Mark Zuckerberg is beginning his testimony before the House Energy and Commerce Committee. His opening remarks are pretty much the same as yesterday at the senate.,1


In [139]:
complete_tweets_df.shape

(1716351, 4)

In [140]:
complete_tweets_df['TextCleared'] = preprocess_text_for_column(complete_tweets_df,'text')


The default value of regex will change from True to False in a future version.


The default value of regex will change from True to False in a future version.



In [141]:
del complete_tweets_df['text']

In [142]:
complete_tweets_df.sample(10)

Unnamed: 0,news_id,tweet_id,label,TextCleared
389232,gossipcop-937243,997956194894462977,1,adele sends prince harry touching message princess diana royal wedding
492262,gossipcop-926287,983571117066637312,1,prince harry meghan markles engagement photographer spills details joyful shoot sometimes picture worth thousand words sometimes picture catapults career another stratosphere alexi lubomirski adept shutterbug w
468113,gossipcop-927561,985802053728591874,1,john cena nikki bella split six years
306404,gossipcop-954167,1021520462214586368,1,stormy daniels husband glendon crain files divorce stormy daniels glendon crain going separate ways e news confirm crain musician former adult star filed divorce daniels july 18 attorney
129787,gossipcop-1686501961,1055160177320701952,0,hey todayshow remember clues matt lauer big jerk nothing years well long take realize megyn kelly even bigger ignorant jerk finally fire everybodytalks
106347,gossipcop-534111552,969195920138088449,0,cual doping lo dijo el reportero
158706,gossipcop-2710044770,1036829136986230784,0,kendall jenner e super sapatao ta mentindo pra ela mesmo
69376,politifact7618,1054837368006631424,1,cnn lead w jake tapper 102318 cnn president trump news today october 23 2018
179527,gossipcop-3029477153,239815736921575424,0,brad pitt 8220livid8221 angelina jolie fight maddox8217s dirt bike
126347,gossipcop-9520738949,1036792990163976194,0,guess kimmy jimmy arent together anymore bb found kevin costner lady bettercallsaul


In [144]:
complete_tweets_df.nunique()

news_id          22959
tweet_id       1435836
label                2
TextCleared     828021
dtype: int64

In [145]:
complete_tweets_df.drop_duplicates(subset=['TextCleared'],inplace=True)

In [148]:
complete_tweets_df.shape

(828021, 4)

In [147]:
complete_tweets_df.sample(10)

Unnamed: 0,news_id,tweet_id,label,TextCleared
435797,gossipcop-905170,951098553480503297,1,hugh grant 57 expecting fifth child seven years girlfriend anna eberstein pictwittercomk2ldsmp2x7
288023,gossipcop-9495394238,1036451586514477056,0,iraq war war based lies millions murdered lets think man cute sudden gives candy mobama
440130,gossipcop-850207,868175742365908992,1,pictures pippa middleton marries james matthewsthe duchess cambridges sister pippa middleton married financier james matth
583509,gossipcop-897766,937470534358482945,1,ed sheeran says beyonce changes email address every week pictwittercom3zfq9tfwjl
165946,gossipcop-2029753949,1037369898647912448,0,apologizing bullshit like back tracking media relationships kris jenner shit annoying
476880,gossipcop-876873,902998421874900992,1,kenya moore fires back haters mocked marriage enough enough kenya moore vowed fight back haters
26614,gossipcop-1716370499,1043886157640216581,0,interesting went jamie dornans tag theres posts way btw even though dakota johnsons movie premiere yesterday go tags jd almost like fans trying take ride success pathetic
174716,politifact12104,1021373615164534787,1,every eatsmart bathroomscale four high precision sensors one feet suggest placing scale hard flat surface always avoid carpet optimal surfaces include ceramic porcelain tile hard wood concrete digitalscale
3397,gossipcop-910052,958071753938493441,1,luis fonsi shares message fans taking home grammy well continue share culture pictwittercom2tedzojt8c
3866,politifact14742,654057478452326400,0,snapchat shutting video hub snap channel


In [150]:
complete_tweets_df['TextCleared'] = preprocess_text_for_column(complete_tweets_df,'TextCleared')


The default value of regex will change from True to False in a future version.


The default value of regex will change from True to False in a future version.



In [151]:
complete_tweets_df.drop_duplicates(subset=['TextCleared'],inplace=True)

In [152]:
complete_tweets_df.shape

(679798, 4)

In [154]:
complete_freqs_for_tweets_polit = create_dataframe_with_statistical_values_for_words(complete_tweets_df[complete_tweets_df['news_id'].str.startswith('polit')],'TextCleared')

In [159]:
px.bar(complete_freqs_for_tweets_polit[complete_freqs_for_tweets_polit['Total']>4500].sort_values(['Total','label'],ascending=False),x='Word',y='Frequency',color='label')

In [163]:
px.bar(complete_freqs_for_tweets_polit[complete_freqs_for_tweets_polit['Total']>4500].sort_values(['FrequencyNegative','label'],ascending=False),x='Word',y='FrequencyRelative',color='label')

In [164]:
complete_freqs_for_tweets_celeb = create_dataframe_with_statistical_values_for_words(complete_tweets_df[complete_tweets_df['news_id'].str.startswith('gos')],'TextCleared')

In [174]:
px.bar(complete_freqs_for_tweets_celeb[complete_freqs_for_tweets_celeb['Total']>500].sort_values(['Total','label'],ascending=False)[:200],x='Word',y='Frequency',color='label')

In [179]:
px.bar(complete_freqs_for_tweets_celeb[complete_freqs_for_tweets_celeb['Total']>2000].sort_values(['FrequencyNegative','label'],ascending=False)[:150],x='Word',y='FrequencyRelative',color='label')