In [79]:
import os
import pandas as pd
import numpy as np
import settings
pd.set_option('display.max_colwidth', -1)
import json
import re

In [80]:
df = pd.read_csv(os.path.join(settings.DATA_DIR, 'train.csv'))

In [81]:
df[['comment_text', 'target']].sample(10)

Unnamed: 0,comment_text,target
426675,"While we tend to think of Hiroshima and Nagasaki as singular events because of their significance as harbingers of the nuclear age, that is really a retrospective view. At the time they were just two more atrocities among many. The firebombing of Tokyo killed more people and that was not enough to make the Japanese stop.\n\nSo, the idea that Hiroshima and Nagasaki were necessary to shock the Japanese into surrendering is simply untrue. What shocked the Japanese into surrendering was that the Soviet Union entered the war and made mincemeat of their armies in Manchuria and threatened the home islands. They would much rather surrender to Truman than to Stalin.\n\nThe US knew all this, including the imminent entry of the Soviets into the war. But the momentum had already been built up to make use of the weapon before the war ended.\n\nNot sure if this counts as ""cowardly"". It was just the horrifying realpolitik that was going on in the dying days of WW2.",0.0
1023197,He had to pinch a loaf half way up. I hope they edit that out,0.0
222678,"Trump boasted, ""I Could Stand In the Middle Of Fifth Avenue And Shoot Somebody And I Wouldn't Lose Any Voters""\nNow he will probably boast,\nI could have sex with a married woman in the middle of Fifth Ave & I wouldn't lose any voters.\nProbably. DT can do anything & get away with it with a group of deplorables, and some RCs among 'em!",0.525
197248,This sounds like financial accounting 101 and a good investment. Borrow money at a lower interest rate than the rate of return on the money. Do it!,0.0
1493711,Why is it only lapdogs are the ones who constantly proclaim they are hearing dogma and dog whistles?\n\nVets4Trump,0.7
239974,It's better than a drought.,0.0
834931,There will come a day when my fat-azz will no longer fit in my 911... at which point I'll consider the Jag....or a mini van.,0.2
1618898,"Often these perverse destroyers of children get less jail time than thieves. And then of course we try to re-habilitate them and release them back on society, where enevitibly they return to their demented ways. When are we as a society going to step up and stop this ... Fu$! Three strikes, do something like this once and get locked up for life! A person is either evil enough to even have these sick twisted fantasies or they are not.\nPeriod",0.4
784074,"Give the financial industry a devalued pound, lower corporate tax rates and half the regulation and red tape that the EU has and they will stay in London forever.\n\nWhat you miss is that an UK unencumbered by stifling EU bureaucracy and red tape will be able to run circles around its competitors on the continent.",0.0
1043967,Surrey? LOL!,0.0


In [82]:
def remove_space(text: str, spaces: list, only_clean: bool = True):
    """
    Remove extra spaces and ending space if any.

    :param text: text to clean
    :param text: spaces
    :param only_clean: simply clean texts or also replace texts
    :return: cleaned text
    """
    if not only_clean:
        for space in spaces:
            text = text.replace(space, ' ')

    text = text.strip()
    text = re.sub('\s+', ' ', text)

    return text


def replace_words(text: str, mapping: dict):
    """
    Replaces unusual punctuation with normal.

    :param text: text to clean
    :param mapping: dict with mapping
    :return: cleaned text
    """
    for word in mapping:
        if word in text:
            text = text.replace(word, mapping[word])

    return text

def clean_number(text: str):
    """
    Cleans numbers.

    :param text: text to clean
    :return: cleaned text
    """
    text = re.sub(r'(\d+)([a-zA-Z])', '\g<1> \g<2>', text)
    text = re.sub(r'(\d+) (th|st|nd|rd) ', '\g<1>\g<2> ', text)
    text = re.sub(r'(\d+),(\d+)', '\g<1>\g<2>', text)
    text = re.sub(r'(\d+),', '\g<1>', text)
    text = re.sub(r'(\d+)(e)(\d+)', '\g<1> \g<3>', text)

    return text


def spacing_punctuation(text: str, punctuation: str):
    """
    Add space before and after punctuation and symbols.

    :param text: text to clean
    :param punctuation: string with symbols
    :return: cleaned text
    """
    for punc in punctuation:
        if punc in text:
            text = text.replace(punc, f' {punc} ')

    return text

In [83]:
def fixing_with_regex(text):
    """
    Additional fixing of words.

    :param text: text to clean
    :return: cleaned text
    """

    mis_connect_list = ['\b(W|w)hat\b', '\b(W|w)hy\b', '(H|h)ow\b', '(W|w)hich\b', '(W|w)here\b', '(W|w)ill\b']
    mis_connect_re = re.compile('(%s)' % '|'.join(mis_connect_list))

    text = re.sub(r" (W|w)hat+(s)*[A|a]*(p)+ ", " WhatsApp ", text)
    text = re.sub(r" (W|w)hat\S ", " What ", text)
    text = re.sub(r" \S(W|w)hat ", " What ", text)
    text = re.sub(r" (W|w)hy\S ", " Why ", text)
    text = re.sub(r" \S(W|w)hy ", " Why ", text)
    text = re.sub(r" (H|h)ow\S ", " How ", text)
    text = re.sub(r" \S(H|h)ow ", " How ", text)
    text = re.sub(r" (W|w)hich\S ", " Which ", text)
    text = re.sub(r" \S(W|w)hich ", " Which ", text)
    text = re.sub(r" (W|w)here\S ", " Where ", text)
    text = re.sub(r" \S(W|w)here ", " Where ", text)
    text = mis_connect_re.sub(r" \1 ", text)
    text = text.replace("What sApp", ' WhatsApp ')

    # Clean repeated letters.
    text = re.sub(r"(I|i)(I|i)+ng", "ing", text)
    text = re.sub(r"(-+|\.+)", " ", text)

    text = re.sub(r'[\x00-\x1f\x7f-\x9f\xad]', '', text)
    text = re.sub(r'(\d+)(e)(\d+)', r'\g<1> \g<3>', text)  # is a dup from above cell...
    text = re.sub(r"(-+|\.+)\s?", "  ", text)
    text = re.sub("\s\s+", " ", text)
    text = re.sub(r'ᴵ+', '', text)

    text = re.sub(r"(W|w)on(\'|\’)t ", "will not ", text)
    text = re.sub(r"(C|c)an(\'|\’)t ", "can not ", text)
    text = re.sub(r"(Y|y)(\'|\’)all ", "you all ", text)
    text = re.sub(r"(Y|y)a(\'|\’)ll ", "you all ", text)

    text = re.sub(r"(I|i)(\'|\’)m ", "i am ", text)
    text = re.sub(r"(A|a)in(\'|\’)t ", "is not ", text)
    text = re.sub(r"n(\'|\’)t ", " not ", text)
    text = re.sub(r"(\'|\’)re ", " are ", text)
    #text = re.sub(r"(\'|\’)s ", " is ", text)
    text = re.sub(r"(\'|\’)d ", " would ", text)
    text = re.sub(r"(\'|\’)ll ", " will ", text)
    text = re.sub(r"(\'|\’)t ", " not ", text)
    text = re.sub(r"(\'|\’)ve ", " have ", text)

    text = re.sub(
        r'(by|been|and|are|for|it|TV|already|justhow|some|had|is|will|would|should|shall|must|can|his|here|there|them|these|their|has|have|the|be|that|not|was|he|just|they|who)(how)',
        '\g<1> \g<2>', text)

    return text


In [84]:
def load_preprocessing_data():
    """
    Loads dict with various mappings and strings for cleaning.

    :return:
    """
        
    with open('mapping_dict.json', 'r') as f:
        mapping_dict = json.load(f)

    # combine several dicts into one
    replace_dict = {**mapping_dict['contraction_mapping'],
                    **mapping_dict['mispell_dict'],
                    **mapping_dict['special_punc_mappings'],
                    **mapping_dict['rare_words_mapping'],
                    **mapping_dict['bad_case_words'],
                    **mapping_dict['mis_spell_mapping']}

    mapping_dict = {'spaces': mapping_dict['spaces'],
                    'punctuation': mapping_dict['punctuation'],
                    'words_to_replace': replace_dict}

    return mapping_dict

In [16]:
import swifter



In [85]:
def preprocess(text: str):
    """
    Apply all preprocessing.

    :param text: text to clean.
    :return: cleaned text
    """

    text = remove_space(text, mapping_dict['spaces'], only_clean=False)
    text = clean_number(text)
    
    text = fixing_with_regex(text)
    text = replace_words(text, mapping_dict['words_to_replace'])
    
    #text = spacing_punctuation(text, mapping_dict['punctuation'])

    #for punct in "/-'":
    #    if punct in text:
    #        text = text.replace(punct, ' ')

    text = clean_number(text)
    text = remove_space(text, mapping_dict['spaces'])

    return text




In [86]:
mapping_dict = load_preprocessing_data()

In [87]:
mapping_dict.keys()

dict_keys(['spaces', 'punctuation', 'words_to_replace'])

In [None]:
#mapping_dict['punctuation']="/-'?!.,#$%\'()*+-/:;<=>@[\\]^_`{|}~`" + '""“”’' + '∞θ÷α•à−β∅³π‘₹´°£€\×™√²—–&'

In [88]:
with open('mapping_dict.json', 'r') as f:
    raw_map = json.load(f)

In [89]:
raw_map.keys()

dict_keys(['contraction_mapping', 'mispell_dict', 'special_punc_mappings', 'spaces', 'rare_words_mapping', 'bad_case_words', 'punctuation', 'mis_spell_mapping'])

In [110]:
raw_map['rare_words_mapping']

{' s.p ': ' ',
 ' S.P ': ' ',
 'U.s.p': '',
 'U.S.A.': 'USA',
 'u.s.a.': 'USA',
 'U.S.A': 'USA',
 'u.s.a': 'USA',
 'U.S.': 'USA',
 'u.s.': 'USA',
 ' U.S ': ' USA ',
 ' u.s ': ' USA ',
 'U.s.': 'USA',
 ' U.s ': 'USA',
 ' u.S ': ' USA ',
 'fu.k': 'fuck',
 'U.K.': 'UK',
 ' u.k ': ' UK ',
 ' don t ': ' do not ',
 'bacteries': 'batteries',
 ' yr old ': ' years old ',
 'Ph.D': 'PhD',
 'cau.sing': 'causing',
 'Kim Jong-Un': 'The president of North Korea',
 'savegely': 'savagely',
 'Ra apist': 'Rapist',
 '2fifth': 'twenty fifth',
 '2third': 'twenty third',
 '2nineth': 'twenty nineth',
 '2fourth': 'twenty fourth',
 '#metoo': 'MeToo',
 'Trumpcare': 'Trump health care system',
 '4fifth': 'forty fifth',
 'Remainers': 'remainder',
 'Terroristan': 'terrorist',
 'antibrahmin': 'anti brahmin',
 'fuckboys': 'fuckboy',
 'Fuckboys': 'fuckboy',
 'Fuckboy': 'fuckboy',
 'fuckgirls': 'fuck girls',
 'fuckgirl': 'fuck girl',
 'Trumpsters': 'Trump supporters',
 '4sixth': 'forty sixth',
 'culturr': 'culture',
 '

In [78]:
raw_map['bad_case_words']

{'nationalpost': 'national post',
 'businessinsider': 'business insider',
 'jewprofits': 'jew profits',
 'QMAS': 'Quality Migrant Admission Scheme',
 'casterating': 'castrating',
 'Kashmiristan': 'Kashmir',
 'CareOnGo': 'India first and largest Online distributor of medicines',
 'Setya Novanto': 'a former Indonesian politician',
 'TestoUltra': 'male sexual enhancement supplement',
 'rammayana': 'ramayana',
 'Badaganadu': 'Brahmin community that mainly reside in Karnataka',
 'bitcjes': 'bitches',
 'mastubrate': 'masturbate',
 'Français': 'France',
 'Adsresses': 'address',
 'flemmings': 'flemming',
 'intermate': 'inter mating',
 'feminisam': 'feminism',
 'cuckholdry': 'cuckold',
 'Niggor': 'black hip-hop and electronic artist',
 'narcsissist': 'narcissist',
 'Genderfluid': 'Gender fluid',
 ' Im ': ' I am ',
 ' dont ': ' do not ',
 'Qoura': 'Quora',
 'ethethnicitesnicites': 'ethnicity',
 'Namit Bathla': 'Content Writer',
 'What sApp': 'WhatsApp',
 'Führer': 'Fuhrer',
 'covfefe': 'coverage

In [58]:
mapping_dict['words_to_replace']

{"Trump's": 'trump is',
 "'cause": 'because',
 ',cause': 'because',
 ';cause': 'because',
 "ain't": 'am not',
 'ain,t': 'am not',
 'ain;t': 'am not',
 'ain´t': 'am not',
 'ain’t': 'am not',
 "aren't": 'are not',
 'aren,t': 'are not',
 'aren;t': 'are not',
 'aren´t': 'are not',
 'aren’t': 'are not',
 "can't": 'cannot',
 "can't've": 'cannot have',
 'can,t': 'cannot',
 'can,t,ve': 'cannot have',
 'can;t': 'cannot',
 'can;t;ve': 'cannot have',
 'can´t': 'cannot',
 'can´t´ve': 'cannot have',
 'can’t': 'cannot',
 'can’t’ve': 'cannot have',
 "could've": 'could have',
 'could,ve': 'could have',
 'could;ve': 'could have',
 "couldn't": 'could not',
 "couldn't've": 'could not have',
 'couldn,t': 'could not',
 'couldn,t,ve': 'could not have',
 'couldn;t': 'could not',
 'couldn;t;ve': 'could not have',
 'couldn´t': 'could not',
 'couldn´t´ve': 'could not have',
 'couldn’t': 'could not',
 'couldn’t’ve': 'could not have',
 'could´ve': 'could have',
 'could’ve': 'could have',
 "didn't": 'did not',
 'd

In [90]:
from pandarallel import pandarallel

In [30]:
def text_clean_wrapper_swifter(df):
    df["comment_text"] = df["comment_text"].swifter.apply(preprocess).astype(str)
    return df

In [91]:
pandarallel.initialize()
def text_clean_wrapper(df):
    df["comment_text"] = df["comment_text"].parallel_apply(preprocess).astype(str)
    return df

New pandarallel memory created - Size: 2000 MB
Pandarallel will run on 24 workers


In [92]:
df_cleaned = text_clean_wrapper(df)

In [93]:
df = pd.read_csv(os.path.join(settings.DATA_DIR, 'train.csv'))

In [94]:
df.head()

Unnamed: 0,id,target,comment_text,severe_toxicity,obscene,identity_attack,insult,threat,asian,atheist,...,article_id,rating,funny,wow,sad,likes,disagree,sexual_explicit,identity_annotator_count,toxicity_annotator_count
0,59848,0.0,"This is so cool. It's like, 'would you want your mother to read this??' Really great idea, well done!",0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
1,59849,0.0,"Thank you!! This would make my life a lot less anxiety-inducing. Keep it up, and don't let anyone get in your way!",0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
2,59852,0.0,This is such an urgent design problem; kudos to you for taking it on. Very impressive!,0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
3,59855,0.0,Is this something I'll be able to install on my site? When will you be releasing it?,0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
4,59856,0.893617,haha you guys are a bunch of losers.,0.021277,0.0,0.021277,0.87234,0.0,0.0,0.0,...,2006,rejected,0,0,0,1,0,0.0,4,47


In [95]:
df['cleaned'] = df_cleaned.comment_text

In [96]:
df[df['target']>=0.5][['comment_text', 'cleaned']].sample(10)

Unnamed: 0,comment_text,cleaned
875690,"This proposal is so incredibly stupid it's astounding that it has progressed this far.\n\nOK, so all the natural gas going to Vancouver ultimately gets shut off. What will replace it? Electricity. And who said there is no need for the power that site C will produce? Oh right, we can buy power from the US, putting ourselves more at the mercy of the orange buffoon.\n\nThe real buffoon in Vancouver is Gregor Robertson.","This proposal is so incredibly stupid it is astounding that it has progressed this far OK , so all the natural gas going to Vancouver ultimately gets shut off What will replace it ? Electricity And who said there is no need for the power that site C will produce ? Oh right , we can buy power from the US , putting ourselves more at the mercy of the orange buffoon The real buffoon in Vancouver is Gregor Robertson"
362958,It's Trump's version of whack-a-mole - one grotesque candidate for his administration disappears and another equally awful one appears instantaneously.,it is trump is version of whack a mole one grotesque candidate for his administration disappears and another equally awful one appears instantaneously
413885,"So Netayahoo, no sic intended, thought he could continue poking sticks in Obama's eye with no consequence so he got what he deserved on this one. Israelis should be angry with him over such inane moves. Bibi has no more interest in negotiating a peace deal than taking a moderate approach on anything. He simply is not worthy of any trust whatsoever.\nI have little sympathy for any side in the conflict, but under the circumstances admit I'd likely be just as intransigent if I was one of them.","So Netayahoo , no sic intended , thought he could continue poking sticks in Obama ' s eye with no consequence so he got what he deserved on this one Israelis should be angry with him over such inane moves Bibi has no more interest in negotiating a peace deal than taking a moderate approach on anything He simply is not worthy of any trust whatsoever I have little sympathy for any side in the conflict , but under the circumstances admit I would likely be just as intransigent if I was one of them"
1668626,Democrats are a bunch of hypocrites. They say one thing and do the opposite. Shame.,Democrats are a bunch of hypocrites They say one thing and do the opposite Shame
11645,"Good article Mr. Dingman. The legislative system in Alaska is badly broken. Simply changing the players is not the total solution if they don't follow their own procedural rules and instead continue to favor a standing caucus based on ""traditions"" instead of those written rules. \n This standing caucus system has created a ""legislature within the legislature"". Anyone from any party can join as long as they ""behave"". \n Voters must ask these candidates if they will join a standing caucus that demands ""lock step"" voting for a final budget regardless of sustainability and to follow leadership on all procedural votes used to quash suggestions from non-caucus members. Also will the candidate support the current legislative exemption from state open meeting law? \nFinally, great photo of the clown. I particularly enjoy the irony that the clown in the photo is not smiling which means the photo could actually be a legislator on the way to the capital.","Good article Mr Dingman The legislative system in Alaska is badly broken Simply changing the players is not the total solution if they do not follow their own procedural rules and instead continue to favor a standing caucus based on "" traditions "" instead of those written rules This standing caucus system has created a "" legislature within the legislature "" Anyone from any party can join as long as they "" behave "" Voters must ask these candidates if they will join a standing caucus that demands "" lock step "" voting for a final budget regardless of sustainability and to follow leadership on all procedural votes used to quash suggestions from non caucus members Also will the candidate support the current legislative exemption from state open meeting law ? Finally , great photo of the clown I particularly enjoy the irony that the clown in the photo is not smiling which means the photo could actually be a legislator on the way to the capital"
1133837,"Your attempt at copying my account isn't about to be successful.\nYou're a fraud.\nFlagging all your posts, including the fake accounts where you try to defend your own racism.\nYou should be banned.","Your attempt at copying my account is not about to be successful You are a fraud Flagging all your posts , including the fake accounts where you try to defend your own racism You should be banned"
1385965,"About the only thing this clown is good at, photo ops and parades.","About the only thing this clown is good at , photo ops and parades"
1211817,"There are many sources for hamburgers, and sugary frozen drinks in this state. There is only ONE provider in Anchorage for CABLE internet. Due to the high capital expense to establish a cable network, there is a 'high barrier to entry' so it is unlikely there will be any competition soon, especially given GCI's size and power, and ability to drive any competitor out of the market.\n\nPlease, your argument is tired and intellectually dishonest. \n\nYou must work for GCI, to post such a stupid comment. Either that, or you're a Republican.","There are many sources for hamburgers , and sugary frozen drinks in this state There is only ONE provider in Anchorage for CABLE internet Due to the high capital expense to establish a cable network , there is a ' high barrier to entry ' so it is unlikely there will be any competition soon , especially given GCI ' s size and power , and ability to drive any competitor out of the market Please , your argument is tired and intellectually dishonest You must work for GCI , to post such a stupid comment Either that , or you are a Republican"
1204180,"I agree, even if I'd state it a bit differently. Any time Kiz announces what ""Broncos Country"" will do or say, it's moronic. In fact, I find the whole thing patronizing. \n\nI won't melt down either, but it's scary times. In fact, this article lays out a series of things I didn't realize, including the fact that Ellis is one of 3 trustees who get to vote on which ""kid"" has proven they can serve as owner. The scary part is we're almost a year beyond the original deadline and the NFL can get impatient with stalled team sales.\n\nhttp://www.denverpost.com/2014/09/20/wholl-be-given-the-reins-of-broncos-ownership/","I agree , even if I would state it a bit differently Any time Kiz announces what "" Broncos Country "" will do or say , it is moronic In fact , I find the whole thing patronizing I will not melt down either , but it is scary times In fact , this article lays out a series of things I did not realize , including the fact that Ellis is one of 3 trustees who get to vote on which "" kid "" has proven they can serve as owner The scary part is we are almost a year beyond the original deadline and the NFL can get impatient with stalled team sales http : / / www denverpost com / 2014 / 09 / 20 / wholl be given the reins of broncos ownership /"
587577,Kiss those Federal funds...GOODBYE. LMAO. Feel sorry for the solid citizens of Jefferson County that they have such utterly stupid people running the county.,Kiss those Federal funds GOODBYE LMAO Feel sorry for the solid citizens of Jefferson County that they have such utterly stupid people running the county


In [97]:
df_cleaned.to_csv(os.path.join(settings.DATA_DIR, 'train_clean.csv'), header=True, index=False)

In [100]:
df_test = pd.read_csv(os.path.join(settings.DATA_DIR, 'test.csv'))

In [101]:
df_test.head()

Unnamed: 0,id,comment_text
0,7000000,Jeff Sessions is another one of Trump's Orwellian choices. He believes and has believed his entire career the exact opposite of what the position requires.
1,7000001,"I actually inspected the infrastructure on Grand Chief Stewart Philip's home Penticton First Nation in both 2010 and 2013. Exactly Zero projects that had been identified in previous inspection reports had been funded by the federal government, and the entire band was housed in ATCO trailers. Clearly the Harper Conservatives had already reduced the cash his band was sent to zero."
2,7000002,"No it won't . That's just wishful thinking on democrats fault . For the 100 th time , Walker cited the cost of drug users treatment as being lost with Obamacare . I laugh every time I hear a liberal claim republicans want to hurt people , and that's why they dumped Obamacare."
3,7000003,"Instead of wringing our hands and nibbling the periphery of the issue, how about we face the actual issue head on? I would support a city ordinance against loitering, and applaud city councilors who champion a real and permanent solution.\n\nThe details could be determined, but would include a limit to persons sitting, standing, lying, smoking, conversing over a certain amount of time, perhaps 10 minutes. Exceptions, of course, would be for shopping, dining, attending a licensed and approved event, etc.\n\nIt is noble to provide some services for the truly needy in our community, but that, in my estimation, is a separate issue. I do not wish for my city to provide for nor encourage idle and harassing behaviors in our city core.\n\nEnough is enough!"
4,7000004,"how many of you commenters have garbage piled high in your yard, bald tires, dead batteries, rotten pallets, car parts, blah blah blah. this town is a pigpen. drive around and look for yourself, its pathetic."


In [102]:
df_test_clean = text_clean_wrapper(df_test)

In [105]:
df_test_clean.sample(10)

Unnamed: 0,id,comment_text
69701,7069701,A significant issue for aging seniors is mobility Simple tasks like shopping or visiting the dentist are made much more difficult if one cannot drive I can see how this would allow people to continue to live in their homes rather than move into assisted living or other accommodations
96704,7096704,and another fantasy yet again based on What nothing in the real world that is for sure
29916,7029916,"Margaret Wente and Rex Murphy quotes , prog ? Really ? Not very convincing , i am sorry"
44202,7044202,"The research supports the conclusion that people can learn all their lives In addition , anyone with a faith life who does not believe in the power of God ' s love to change them may want to give themselves more time to think about these things"
21668,7021668,"Wow , interesting that my property assessment has gone up just short of $ 40000 over the last 3 years and now that idiot of a mayor of ours does not want to give it back Instead , he will blow it on some of his pet liberal projects"
39586,7039586,"And for those of us who do pay our taxes , we would like to see Donald pays his fair share of taxes too It is not enough to hear a natural born liar say , "" I have paid his taxes "" Is he lying ? Probably But with Special Procecutor Mueller snooping around , this is what I have to say : Trump , get your story straight with the KGB ' s VEB bank , so I suggest you meet with Putin , immediately lol"
46288,7046288,"https : / / www cangea ca / bc geothermal resource estimate maps html most of the province , except the central interior The existing legacy dams are not going anywhere and can provide all necessary standby power Bio mass is not dirtier than burning coal , and we have a bio mass buildup problem in BC You might have noticed a few fires this past summer"
71260,7071260,"Since you mentioned stupidity it is "" there "" not "" they are """
79771,7079771,"Fuckin , this guy is a terrible person What he does with the riches that he inherited is that he buys low income apartments in other towns and then jacks the rent up on working poor people Since Darrel Dickson bought my apartment complex 6 years ago the rent has gone up $ 600 with absolutely no tangible improvements People are moving in with each other and ending up on the streets as a result This slumlords buys these GOVERNMENT SUBSIDIZED PROPERTIES that were made in order to give poor people a chance in a place where the housing market is getting utterly destroyed by greed If all of the rich people in Western Washington still want people to serve their coffee , take care of their elderly , landscape their cities and do all of the crummy jobs that they do not want to do then they need to stop taking advantage of a predatory housing market and stop bilking the poor for all they have Fuck you Darrel Dickson You are a bad person and will be going to hell for your greed"
76919,7076919,"I have a friend who lives in Sweden He would disagree wholeheartedly He says attacks occur daily Not major terrorist attacks but several small pockets of rapes , beatings , and mugging / robbery All from economic migrants Rapes and sexual assaults have exploded Not sure why we do not get minimal coverage of this over here but I can certainly lay a few guesses as to Why A terrorist attack does not have to be a huge attack to be considered terror You knew that though , right ?"


In [106]:
df_test_clean.to_csv(os.path.join(settings.DATA_DIR, 'test_clean.csv'), header=True, index=False)