# Data Cleaning

### Cleaning the quotes

Replace special quotes by normal ones and remove single quotes to prevent parsing errors.

In [1]:
from src.data_analysis.clean_dataset import cleanup_dataset
import pandas as pd
import string

In [2]:
data_folder = "../../data/"

In [3]:
cleanup_dataset("{}train.csv".format(data_folder), "{}train_cleaned.csv".format(data_folder))

In [4]:
cleanup_dataset("{}dev.csv".format(data_folder), "{}dev_cleaned.csv".format(data_folder))

In [3]:
cleanup_dataset("{}test.csv".format(data_folder), "{}test_cleaned.csv".format(data_folder))

### Cleaning the labels

#### Train dataset

In [6]:
train = pd.read_csv("{}train_cleaned.csv".format(data_folder),
                    header=None, 
                    names=["image_name", "Image_URL", "OCR_extracted_text", "Corrected_text", "Humour", "Sarcasm", "Offense", "Motivation", "Overall_sentiment", "Basis_of_classification"])

In [16]:
def text_cleaning(row):
    x = row["Corrected_text"]
    if not x or (type(x) == float and math.isnan(x)):
        return None
    cleaned = x.translate(str.maketrans('', '', string.punctuation))
    cleaned = " ".join(cleaned.lower().split())
    return cleaned

In [8]:
train.Humour.unique()

array(['hilarious', 'not_funny', 'very_funny', 'funny'], dtype=object)

In [9]:
train.Sarcasm.unique()

array(['general', 'not_sarcastic', 'twisted_meaning', 'very_twisted'],
      dtype=object)

In [10]:
train.Offense.unique()

array(['not_offensive', 'very_offensive', 'slight', 'hateful_offensive'],
      dtype=object)

In [11]:
train.Motivation.unique()

array(['not_motivational', 'motivational'], dtype=object)

In [12]:
train.Overall_sentiment.unique()

array(['very_positive', 'positive', 'neutral', 'negative',
       'very_negative',
       'positivechandler_Friday-Mood-AF.-meme-Friends-ChandlerBing.jpg'],
      dtype=object)

In [13]:
train.loc[train['Overall_sentiment'] == "positivechandler_Friday-Mood-AF.-meme-Friends-ChandlerBing.jpg"]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification
726,chandler_f50efbd3af8d0a93a2ecdead0dc5044a.jpg,https://i.pinimg.com/originals/f5/0e/fb/f50efb...,Chandler Trolling on someone else's account BE...,Chandler Trolling on someone else's account BE...,funny,not_sarcastic,not_offensive,not_motivational,positivechandler_Friday-Mood-AF.-meme-Friends-...,


In [14]:
train = train.replace("positivechandler_Friday-Mood-AF.-meme-Friends-ChandlerBing.jpg", "positive")

In [15]:
train.Overall_sentiment.unique()

array(['very_positive', 'positive', 'neutral', 'negative',
       'very_negative'], dtype=object)

In [16]:
train.Basis_of_classification.unique()

array([nan])

#### Dev dataset

In [4]:
dev = pd.read_csv("{}dev_cleaned.csv".format(data_folder))
dev.columns.values

array(['Image_name', 'Image_URL', 'OCR_extracted_text', 'corrected_text',
       'Humour', 'Sarcasm', 'offensive', 'Motivational',
       'Overall_Sentiment', 'Basis_of_classification'], dtype=object)

In [5]:
dev = dev.rename(columns={"corrected_text": "Corrected_text", "offensive": "Offense", "Motivational": "Motivation", "Overall_Sentiment": "Overall_sentiment", "Image_name": "image_name"})

In [6]:
dev.Humour.unique()

array(['very_funny', 'funny', 'not_funny', 'hilarious'], dtype=object)

In [7]:
dev.Sarcasm.unique()

array(['general', 'not_sarcastic', 'twisted_meaning', 'very_twisted'],
      dtype=object)

In [8]:
dev.Offense.unique()

array(['hateful_offensive', 'not_offensive', 'slight', 'very_offensive'],
      dtype=object)

In [9]:
dev.Motivation.unique()

array(['not_motivational', 'motivational'], dtype=object)

In [10]:
dev.Overall_sentiment.unique()

array(['very_positive', 'negative', 'neutral', 'positive',
       'very_negative'], dtype=object)

In [11]:
dev.Basis_of_classification.unique()

array(['image_and_text ', 'image', 'text', 'image_and_text  '],
      dtype=object)

#### Test dataset

In [12]:
test = pd.read_csv("{}test_cleaned.csv".format(data_folder), skiprows=1, names=["image_name", "Image_URL", "OCR_extracted_text", "Corrected_text"])

In [13]:
test.columns.values

array(['image_name', 'Image_URL', 'OCR_extracted_text', 'Corrected_text'],
      dtype=object)

### Checking missing values

In [17]:
def replace_missing_text(x):
    if x["Corrected_text"] == ' ' or pd.isna(x["Corrected_text"]):
        return x["OCR_extracted_text"]
    return x["Corrected_text"]

#### Train dataset

In [27]:
to_check = train.drop(["Basis_of_classification", "OCR_extracted_text"], axis=1)
to_check[to_check.isnull().any(axis=1)]

Unnamed: 0,image_name,Image_URL,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment
4214,trump_1ciwua.jpg,https://i.imgflip.com/1ciwua.jpg,,very_funny,general,slight,motivational,positive
4230,trump_85486890.jpg,https://cdn.ebaumsworld.com/mediaFiles/picture...,,very_funny,general,slight,not_motivational,neutral
4231,trump_1486350110-meme-5.png,https://sportsdaydfw.imgix.net/1486350110-meme...,,funny,twisted_meaning,very_offensive,motivational,negative
4261,trump_qeqrech7dx3z.jpg,https://i.redd.it/qeqrech7dx3z.jpg,,not_funny,general,slight,motivational,very_positive
4807,trump_d88.jpg,https://i.kym-cdn.com/photos/images/facebook/0...,,very_funny,general,slight,motivational,neutral
5285,minion_itm-about-as-ok-with-libs-fuckin-minion...,https://pics.conservativememes.com/itm-about-a...,,very_funny,general,slight,motivational,neutral
6789,trump_1485530548-donald-trump-and-hillary-clin...,https://im.indiatimes.in/content/itimes/photo/...,,very_funny,twisted_meaning,not_offensive,not_motivational,positive
6792,trump_amusing-memes.jpg,http://worldwideinterweb.com/wp-content/upload...,,hilarious,general,not_offensive,not_motivational,positive
6794,trump_clinton-vs-trump-memes.jpg,http://worldwideinterweb.com/wp-content/upload...,,not_funny,not_sarcastic,very_offensive,motivational,positive


In [28]:
train[train["Corrected_text"].isnull()]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification
4214,trump_1ciwua.jpg,https://i.imgflip.com/1ciwua.jpg,,,very_funny,general,slight,motivational,positive,
4230,trump_85486890.jpg,https://cdn.ebaumsworld.com/mediaFiles/picture...,,,very_funny,general,slight,not_motivational,neutral,
4231,trump_1486350110-meme-5.png,https://sportsdaydfw.imgix.net/1486350110-meme...,,,funny,twisted_meaning,very_offensive,motivational,negative,
4261,trump_qeqrech7dx3z.jpg,https://i.redd.it/qeqrech7dx3z.jpg,,,not_funny,general,slight,motivational,very_positive,
4807,trump_d88.jpg,https://i.kym-cdn.com/photos/images/facebook/0...,,,very_funny,general,slight,motivational,neutral,
5285,minion_itm-about-as-ok-with-libs-fuckin-minion...,https://pics.conservativememes.com/itm-about-a...,,,very_funny,general,slight,motivational,neutral,
6789,trump_1485530548-donald-trump-and-hillary-clin...,https://im.indiatimes.in/content/itimes/photo/...,,,very_funny,twisted_meaning,not_offensive,not_motivational,positive,
6792,trump_amusing-memes.jpg,http://worldwideinterweb.com/wp-content/upload...,,,hilarious,general,not_offensive,not_motivational,positive,
6794,trump_clinton-vs-trump-memes.jpg,http://worldwideinterweb.com/wp-content/upload...,,,not_funny,not_sarcastic,very_offensive,motivational,positive,


In [29]:
train.loc[[4214, 4230, 4231, 4261, 4807, 5285, 6789, 6792, 6794],"Corrected_text"] = [
    "It's rigged ! it's rigged ! it's a left wing-- no, a right wing-- no, a media conspiracy", 
    "Stage #1 - Action complete Stage #2 make it stick!", 
    "Breaking: Trump signs executive order giving Tom Brady and the new england patriots 41 points", 
    "When you login to your neighbors router using 1234", 
    "I'm gonna build some fancy walls even though I have millions of extra dollars in gonna make the mexicans pay for it", 
    "I'm about as OK with libs burnin' the flag as I am with how much it burns when I piss Fukin minion memes. ~L.", 
    "If Donald and Hillary are together on a boat in the middle of the ocean and it sinks. who survives ? AMERICA",
    "Bruh why this tub of margarine look like Donald Trump ?", 
    "2016 election Trump vs Hillary still a better love story than Twilight", 
]

In [30]:
train[train["Corrected_text"].isnull()]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification


In [31]:
train.to_csv("{}train_cleaned_missing.csv".format(data_folder), index=False)

#### Dev dataset

In [32]:
dev[dev["Corrected_text"].isnull()]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification


In [34]:
dev["Corrected_text"] = dev.apply(lambda x: replace_missing_text(x), axis=1)

In [35]:
to_check = dev.drop(["Basis_of_classification", "OCR_extracted_text"], axis=1)
to_check[to_check.isnull().any(axis=1)]

Unnamed: 0,image_name,Image_URL,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment


In [36]:
dev.to_csv("{}dev_cleaned_missing.csv".format(data_folder), index=False)

#### Test dataset

In [18]:
test[test["Corrected_text"].isnull()]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text
222,sports_yzpsp7txypndvzwfuzlb.jpg,https://images.complex.com/complex/image/uploa...,PLAYS A GAME OF ROCK-PAPER-SCISSORS @NBAMEMES ...,
271,trump_Trump-Memes-8.jpg,https://i2.wp.com/informedsharing.com/wp-conte...,,
348,misog_111jh6qu4wyg20204j16vkd.jpg,https://images.complex.com/complex/image/uploa...,MY HUSBAND DOES EVERYTHING I ASK BUT I HAVE TO...,
355,friends_thomas_freaked_out_meme_by_wildcat1999...,https://images-wixmp-ed30a86b8c4ca887773594c2....,THAT MOMENT WHEN YOU WISH YOU COULD UNSEE WHAT...,
513,friends_thomas_and_friends_meme_9_by_thethomag...,https://images-wixmp-ed30a86b8c4ca887773594c2....,WHAT THE HELL?,
623,trump_HellToupee1500-5ada42951d64040039145c86.jpg,https://www.lifewire.com/thmb/e7oqSr4QLiAetIg4...,THERE WILL BE HELL TOUPEE,
853,got_GoT_Meme_2.jpg,https://img-static.popxo.com/tr:w-600,Winter... is coming! fb.com/NCWEmmy #friends w...,
879,misog_257ulhmkmisicg8e7twizsr.jpg,https://images.complex.com/complex/image/uploa...,THERE WILL ONLY BE 7 PLANETS AFTER I DESTROY U...,
984,racis_179trumpracist.jpg,https://www.snopes.com/tachyon/2016/05/trumpra...,Donald Trump has been in the public eye for ov...,
1004,bethe_223Conspiracy-Ted1.jpg,https://media.wired.com/photos/5932705958b0d64...,WHAT IF 'BILL & TED'S' JUST CAME OUT TODAY,


In [19]:
test["Corrected_text"] = test.apply(lambda x: replace_missing_text(x), axis=1)

In [20]:
test[test["Corrected_text"].isnull()]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text
271,trump_Trump-Memes-8.jpg,https://i2.wp.com/informedsharing.com/wp-conte...,,
1815,trump_Melania-Trump-memes-784x441.jpg,https://st1.latestly.com/wp-content/uploads/20...,,


In [21]:
test.loc[[272, 1815],"Corrected_text"] = [
    "Do you have any history of mental illness in your family ? I have an uncle who is voting for Donald Trump",
    ""
]

In [22]:
test.to_csv("{}test_cleaned_missing.csv".format(data_folder), index=False)

### Check for duplicates

#### Train dataset

In [42]:
train[train.duplicated()]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification
1971,hillary_c3fd01300e5bee2ba12a45ee2f160ed11b8926...,http://www.quickmeme.com/img/c3/c3fd01300e5bee...,WANTS TO BAN VIOLENT VIDEO GAMES DOESN'T TRY T...,WANTS TO BAN VIOLENT VIDEO GAMES DOESN'T TRY T...,very_funny,general,slight,motivational,positive,


In [43]:
train.drop_duplicates(keep="first",inplace=True)

#### Dev dataset

In [44]:
dev[dev.duplicated()]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification


#### Test dataset

In [23]:
test[test.duplicated()]

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text


### Remove Urls from text

In [24]:
url_regex = r"(?i)(https?:\/\/)*(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*)"
copyright_regex = r"(?i)(imgflip|memecenter|we\s*know\s*memes?)\s*$"

#### Train dataset

In [47]:
train.Corrected_text = train.Corrected_text.replace(url_regex, '', regex=True)
train.Corrected_text = train.Corrected_text.replace(copyright_regex, '', regex=True)
train.Corrected_text = train.Corrected_text.str.strip()

In [48]:
train[train['Corrected_text'] == '']

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification
5923,modi_f6548f1b161be16495c728b547055962.jpg,https://i.pinimg.com/originals/f6/54/8f/f6548f...,WhatsAppText.com I need 'Add Mitro' button THI...,,very_funny,not_sarcastic,not_offensive,not_motivational,positive,


In [49]:
import re

corrected_text = train.loc[5923, "OCR_extracted_text"]
corrected_text = re.sub(url_regex, '', corrected_text, flags=re.IGNORECASE)
corrected_text = re.sub(copyright_regex, '', corrected_text, flags=re.IGNORECASE)
corrected_text = corrected_text.strip()
print(corrected_text)
train.loc[5923, "Corrected_text"] = corrected_text

I need 'Add Mitro' button THIS PICTURE WAS TWEETED BY @MEAIndia


In [50]:
train.to_csv("../../data/train_url_cleaned.csv", index=False)

#### Dev dataset

In [51]:
dev.Corrected_text = dev.Corrected_text.replace(url_regex, '', regex=True)
dev.Corrected_text = dev.Corrected_text.replace(copyright_regex, '', regex=True)
dev.Corrected_text = dev.Corrected_text.str.strip()

In [52]:
dev[dev['Corrected_text'] == '']

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification


In [53]:
dev.to_csv("../../data/dev_url_cleaned.csv", index=False)

#### Test dataset

In [25]:
test.Corrected_text = test.Corrected_text.replace(url_regex, '', regex=True)
test.Corrected_text = test.Corrected_text.replace(copyright_regex, '', regex=True)
test.Corrected_text = test.Corrected_text.str.strip()

In [26]:
test[test['Corrected_text'] == '']

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text
1815,trump_Melania-Trump-memes-784x441.jpg,https://st1.latestly.com/wp-content/uploads/20...,,


In [27]:
test.to_csv("../../data/test_cleaned_final.csv", index=False)

### Update overall sentiment

#### Train dataset

In [57]:
train = train.replace({'Overall_sentiment': {"very_negative": "negative", "very_positive": "positive"}})

In [58]:
train.Overall_sentiment.unique()

array(['positive', 'neutral', 'negative'], dtype=object)

#### Dev dataset

In [59]:
dev = dev.replace({'Overall_sentiment': {"very_negative": "negative", "very_positive": "positive"}})

In [60]:
dev.Overall_sentiment.unique()

array(['positive', 'negative', 'neutral'], dtype=object)

### Add binary labels for humor, sarcasm and offensive labels

#### Train dataset

In [61]:
train["Humour_bin"] = train["Humour"] != "not_funny"
train["Sarcasm_bin"] = train["Sarcasm"] != "not_sarcastic"
train["Offense_bin"] = train["Offense"] != "not_offensive"
train["Motivation_bin"] = train["Motivation"] != "not_motivational"

In [62]:
train

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification,Humour_bin,Sarcasm_bin,Offense_bin,Motivation_bin
0,10_year_2r94rv.jpg,https://i.imgflip.com/2r94rv.jpg,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,hilarious,general,not_offensive,not_motivational,positive,,True,True,False,False
1,10_year_10-year-challenge_1547788782.jpeg,https://spiderimg.amarujala.com/assets/images/...,The best of #10 YearChallenge! Completed in le...,The best of #10 YearChallenge! Completed in le...,not_funny,general,not_offensive,motivational,positive,,False,True,False,True
2,10_year_10yearchallenge-5c75f8b946e0fb0001edc7...,https://www.lifewire.com/thmb/8wNfd94_meE9X2cp...,Sam Thorne @Strippin ( Follow Follow Saw every...,Sam Thorne @Strippin ( Follow Follow Saw every...,very_funny,not_sarcastic,not_offensive,not_motivational,positive,,True,False,False,False
3,10_year_10-year-challenge-sweet-dee-edition-40...,https://pics.conservativememes.com/10-year-cha...,10 Year Challenge - Sweet Dee Edition,10 Year Challenge - Sweet Dee Edition,very_funny,twisted_meaning,very_offensive,motivational,positive,,True,True,True,True
4,10_year_10-year-challenge-with-no-filter-47-hi...,https://pics.me.me/10-year-challenge-with-no-f...,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,hilarious,very_twisted,very_offensive,not_motivational,neutral,,True,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6995,best_2017_d2fe668a7ff9227fbb604927de025311.jpg,https://i.pinimg.com/345x/d2/fe/66/d2fe668a7ff...,Tuesday is Mardi Gras Wednesday is Valentine's...,Tuesday is Mardi Gras Wednesday is Valentine's...,very_funny,twisted_meaning,very_offensive,motivational,neutral,,True,True,True,True
6996,best_2017_DR9DnutV4AYK-Pc.jpg,https://pbs.twimg.com/media/DR9DnutV4AYK-Pc.jpg,MUST WATCH MOVIES OF 2017 ITI Chennai memes MA...,MUST WATCH MOVIES OF 2017 ITI Chennai memes MA...,funny,twisted_meaning,not_offensive,not_motivational,neutral,,True,True,False,False
6997,best_2017_less-more-talking-listening-planning...,https://pics.ballmemes.com/less-more-talking-l...,LESS MORE TALKING PLANNING SODA JUNK FOOD COMP...,LESS MORE TALKING PLANNING SODA JUNK FOOD COMP...,funny,general,slight,not_motivational,positive,,True,True,True,False
6998,best_2017_make-time.jpg,https://arhtisticlicense.files.wordpress.com/2...,When I VERY have time is a fantasy No one has ...,When I have time is a fantasy. no one has time...,not_funny,twisted_meaning,not_offensive,motivational,positive,,False,True,False,True


In [63]:
train.to_csv("{}train_cleaned_final.csv".format(data_folder), index=False)

#### Dev dataset

In [64]:
dev["Humour_bin"] = dev["Humour"] != "not_funny"
dev["Sarcasm_bin"] = dev["Sarcasm"] != "not_sarcastic"
dev["Offense_bin"] = dev["Offense"] != "not_offensive"
dev["Motivation_bin"] = dev["Motivation"] != "not_motivational"

In [65]:
dev

Unnamed: 0,image_name,Image_URL,OCR_extracted_text,Corrected_text,Humour,Sarcasm,Offense,Motivation,Overall_sentiment,Basis_of_classification,Humour_bin,Sarcasm_bin,Offense_bin,Motivation_bin
0,skeptical_stealing-my-milk.jpg,http://www.relatably.com/m/img/funny-skeptical...,ISAW DAD WITH MOM LASTNIGHT I THINK HE WAS STE...,ISAW DAD WITH MOM LASTNIGHT I THINK HE WAS STE...,very_funny,general,hateful_offensive,not_motivational,positive,image_and_text,True,True,True,False
1,skeptical_breathing+if+she+has+my+nose.jpg,http://1.bp.blogspot.com/-g0E1yGCghCI/T68yJ1Oj...,HOW AM I STILL BREATHING IF SHE HAS MY NOSE? q...,HOW AM I STILL BREATHING IF SHE HAS MY NOSE?,funny,general,not_offensive,motivational,negative,image,True,True,False,True
2,skeptical_603b3553d88441537f6c65abac8a1cec.jpg,https://i.pinimg.com/736x/60/3b/35/603b3553d88...,YOU MEAN TO TELL ME BIGG BOSSIS BETTER THAN KB...,YOU MEAN TO TELL ME BIGG BOSSIS BETTER THAN KB...,not_funny,not_sarcastic,not_offensive,not_motivational,neutral,image_and_text,False,False,False,False
3,skeptical_e17ae5f069b21df5599460939047d4ae8db9...,http://www.quickmeme.com/img/e1/e17ae5f069b21d...,MY SHARE OF THE NATIONAL DEBT IS HOW MUCHP qui...,MY SHARE OF THE NATIONAL DEBT IS HOW MUCH?,funny,general,slight,not_motivational,positive,image_and_text,True,True,True,False
4,skeptical_75c34fa1-4d2b-45c1-9bda-5ff0f15d241e...,http://cdn.funnyisms.com/75c34fa1-4d2b-45c1-9b...,WAIT A SECOND BILLA AM I SKEPTICAL BABY GROWN ...,WAIT A SECOND BILLA AM I SKEPTICAL BABY GROWN ...,funny,twisted_meaning,slight,not_motivational,neutral,image_and_text,True,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,cat_U_367best-funny-cat-memes.jpg,https://cdn3-www.cattime.com/assets/uploads/ga...,I love you lamp Shut up,I love you lamp Shut up,hilarious,general,hateful_offensive,motivational,positive,image_and_text,True,True,True,True
996,cat_U_111curious_cat_happy_birthday_cat_meme1.jpg,https://www.wishesgreeting.com/wp-content/uplo...,I GOT YOU A PRESENT FOR YOUR BIRTHDAY IT'S IN ...,I GOT YOU A PRESENT FOR YOUR BIRTHDAY IT'S IN ...,funny,general,slight,motivational,positive,image,True,True,True,True
997,cat_U_260meme27.png,https://catsareontop.com/wp-content/uploads/20...,WHAT IF THE HUMAN IS NOT MY PET BUT I'M HIS,WHAT IF THE HUMAN IS NOT MY PET BUT I'M HIS,funny,general,very_offensive,not_motivational,positive,image_and_text,True,True,True,False
998,cat_U_314Evil-Cat-Memes-Photos.png,https://www.catbreedselector.com/wp-content/up...,WOW! NEW SOFA I'LL SHOW NO MERCY CatBreed Sele...,WOW! NEW SOFA I'LL SHOW NO MERCY CatBreed f C...,very_funny,twisted_meaning,very_offensive,not_motivational,positive,image_and_text,True,True,True,False


In [66]:
dev.to_csv("{}dev_cleaned_final.csv".format(data_folder), index=False)