### Step 1 - Data Cleaning

Open all the recipes data in one file and prepare them for further analysis!

In [2]:
# Import the necessary library!
import pandas as pd
import os
import numpy as np
import glob
import datetime

import nltk
import re
import string
from nltk.tokenize import RegexpTokenizer  
from nltk.corpus import stopwords  
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
print('package ready')

package ready


In [3]:
pd.set_option('display.max_rows', 100) # specifies number of rows to show
# specifies default number format to 4 decimal places
pd.options.display.float_format = '{:40,.2f}'.format 
pd.options.display.max_colwidth 
pd.options.display.max_colwidth = 500

In [4]:
# List all the files in the folder!  
files = glob.glob('guardian/*.csv')
# read the files in a dataframe  
data_df = [pd.read_csv(f) for f in files]

In [5]:
# Append all the files together  
data = pd.concat(data_df,ignore_index=True)
# Sort by date and Reset the index  
data = data.sort_values(['date']).reset_index()  
# Drop the duplicates  
data = data.drop_duplicates(['date','headline_list','author'])

In [6]:
print("Number of data (recipes) : " + str(data.shape))
print()
data.tail(2)

Number of data (recipes) : (7464, 11)



Unnamed: 0.1,index,Unnamed: 0,date,headline_list,category,author,subtitle,article_content,comments_number,share_number,url
7613,2263,65.0,META,,,"[<meta content=""the Guardian"" name=""author""/>]",,No Content,No Comments,No Sharing,https://www.theguardian.com/lifeandstyle/video/2011/jan/16/miso-soup-recipe-video
7615,1613,160.0,META,,,"[<meta content=""Ruby Tandoh"" name=""author""/>]",,No Content,No Comments,No Sharing,https://www.theguardian.com/lifeandstyle/ng-interactive/2015/oct/16/ruby-tandohs-butternut-squash-and-spinach-casserole-with-herbed-feta-crumble


In [7]:
# Drop the column that we don't need
data = data.drop(columns=['Unnamed: 0','index'])
data = data.drop_duplicates(['date','headline_list'],keep='first')

In [8]:
# Look for null value in the data
print(data.isnull().sum())

date                 0
headline_list        1
category             8
author             324
subtitle           187
article_content      1
comments_number      3
share_number         3
url                  2
dtype: int64


In [9]:
# Make a bit of cleaning
# Delete all the article with category 'Family Life' - Not relevant for recipes analysis! 
data = data[data['category'] != 'Family life']

In [None]:
# Check at the name of the author
#for i in data.author:
  #  print(i)

In [10]:
# Replace the author that have been wrongly webscraped!  
data = data.replace({'[<meta content="Dale Berning Sawa" name="author"/>]':'Dale Berning Sawa',  
                     '[<meta content="Chi-chi Nwanoku" name="author"/>]':'Chi-chi Nwanoku',  
                     '[<meta content="Hugh Fearnley-Whittingstall" name="author"/>]':'Hugh Fearnley-Whittingstall',  
                     '[<meta content="Jeanette Winterson" name="author"/>]':'Jeanette Winterson',  
                     '[<meta content="Matthew Fort" name="author"/>]':'Matthew Fort',  
                     '[<meta content="Susan McCarthy" name="author"/>]':'Susan McCarthy',  
                     '[<meta content="Ariane Sherine" name="author"/>]':'Ariane Sherine',  
                     '[<meta content="Ruby Tandoh" name="author"/>]':'Ruby Tandoh',  
                     '[<meta content="the Guardian" name="author"/>]':'the Guardian',  
                     '[<meta content="Romy Ash" name="author"/>, <meta content="Sarah Trotter" name="author"/>, <meta content="Romy Ash" name="author"/>, <meta content="Lauren Bamford" name="author"/>, <meta content="Sarah Trotter" name="author"/>]':'Romy Ash, Sara Trotter, Lauren Bamford',  
                     '[<meta content="Clem Bastow" name="author"/>]':'Clem Bastow',  
                     '[<meta content="Yotam Ottolenghi" name="author"/>, <meta content="Uyen Luu" name="author"/>, <meta content="David Frenkiel" name="author"/>, <meta content="Luise Vindahl" name="author"/>, <meta content="Caroline Craig" name="author"/>]':'Caroline Craig, Luise Vindahl, Yotam Ottolenghi, Uyen Luu, David Frenkiel',  
                     'Yotam Ottlenghi':'Yotam Ottolenghi','Yuki Sugiura, Valerie Berry, Lee Gould, Rachel Vere and Music by Evan Gildersleeve, theguardian.com':'Yuki Sugiura, Valerie Berry, Lee Gould, Rachel Vere',
                     'Hugh-Fearnley Whittingstall':'Hugh Fearnley-Whittingstall','Hugh Fearnley-Whittinstall':'Hugh Fearnley-Whittingstall'})

# If NaN for authors and NaN for subtitle -> change it to none!   
values = {'author': 'none', 'subtitle':'none'}  
data = data.fillna(value=values)

In [11]:
# Check for the no comments and no url!
test = data[data['comments_number'].isnull()]  
test.share_number = 'No Sharing'  
data.append(test)

change = data[(data['date'] == '2015-01-20') & (data['author']== 'Ruby Tandoh' )]  
change.share_number = 52  
change.url = 'https://www.theguardian.com/lifeandstyle/2015/jan/20/best-baking-recipes-from-2014-ruby-tandoh'  
change.comments_number = 25  
data.append(change)

print("Change ok!")

Change ok!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [12]:
# Remove the rows if NaN is in url, headline_list, share_number and category after having make the above change!  
data = data.dropna(subset = ['headline_list','url','share_number','category'])  

print('Number of rows (recipes), data clean: ' + str(data.shape))
print("")
print('Data Clean')

Number of rows (recipes), data clean: (7390, 9)

Data Clean


In [None]:
# Check the types of the data!
data.dtypes

In [13]:
data.head(2)

Unnamed: 0,date,headline_list,category,author,subtitle,article_content,comments_number,share_number,url
0,2008-09-10T00:01:00+0100,The G2 weekly recipe: Beetroot pickled eggs,The G2 weekly recipe,Allegra McEvedy,none,"B\noth pickling and beetroot are enjoying a renaissance at the moment, while eggs never go out of fashion. When I was over in Virginia, US, the other week my friend James, knowing I liked trying some of the more weird and wonderful local delicacies gave me one of these pink eggs that a friend of his had made.\nHe made his with the liquid from a store-bought jar of pickled beets, which was good and interesting, but a subsequent version made with fresh juice made them even more arresting and b...",No Comments,4,https://www.theguardian.com/lifeandstyle/2008/sep/10/eggs.recipe
1,2008-09-10T11:43:43+0100,Just peachy,Allotment blog,Claire Ptak,A report and recipe from the Slow Food Nation event in San Francisco,"W\nhen I picked my husband up at the San Francisco International airport on Sunday, it was with great trepidation that I admitted an infidelity. 'I have something to tell you,' I said, 'I've been cheating on you — with California.' His knowing smile beamed as he adjusted his sunglasses and put down the window of our borrowed Volvo.\n\nI took him straight to the Mission district, to our old favourite, La Taqueria, for the self-proclaimed ""best burritos and tacos in the world."" Full on beans, ...",0,0,https://www.theguardian.com/global/allotment/2008/sep/10/recipe


Change the data date to date instead of object!

In [14]:
#Separe the date in year and month
# Remove the strings of time in date. Keep only the day and the month.b  
data.date = data.date.str.replace('T\d.*','')  
data['date'] = pd.to_datetime(data['date'])

# Add a column in dataframe for the year, month and day  
data['year'] = data['date'].dt.year  
data['month'] = data['date'].dt.month_name()  
data['day'] = data['date'].dt.day 

In [None]:
data_sub = data[0:2]
for j in data_sub['token_article_content']:
    print(j)
    print('')
    test = ' '.join([lemmatizer.lemmatize(w) for w in j])
    print(test)
    print('')
    data_sub['lem_test'] = test
    # data_sub['lem_test'] = data_sub['token_article_content'].apply(lambda x: ' '.join([lemmatizer.lemmatize(w) for w in j])
    
#data['lem_words'+str(i)] = ' '.join([lemmatizer.lemmatize(w) for w in data['token_'+str(i)]])
#data_sub['test_lem'] = ' '.join([lemmatizer.lemmatize(w) for w in data_sub['token_article_content']])
#data_sub['test_lem'] = data_sub['token_article_content'].apply(lambda x:' '.join(s for s in x))
#data_sub['test_lem_words'] = data_sub['test_lem'].apply(lambda x: lemmatizer.lemmatize(x))

In [None]:
input_str='been had done languages cities mice'
lemmatizer.lemmatize('eggs')
data_sub['lem_test'] = data_sub['token_article_content'].apply(lambda t: ' '.join([lemmatizer.lemmatize(w) for w in t]))lemmatizer = WordNetLemmatizer()

In [15]:
## Function for tokenization and cleaning text!   
cleaning = ['headline_list','subtitle','article_content']  

stop = stopwords.words('english') 
stop_words = ['tsp','tbsp','min','max','youll','neednt','isnt','ive','hese','dont','prep']
stop_remove = stop + stop_words

lemmatizer = WordNetLemmatizer()

def cleandata(data):
        for i in cleaning:
                print(str(i))
                # Remove the punctuation  
                data['clean_'+str(i)] = data[i].apply(lambda x:x.translate(str.maketrans('','', string.punctuation)))
                print('step 1: punctuation remove')

                
                # Get all the words in lowercase  
                data['clean_'+str(i)] = data['clean_'+str(i)].str.lower()  
                # Remove the '\\n' line_brakes  
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'\n\d\d\d|\n\d\d|\n\d|\n\½|\.\n',' ')
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'\n',' ')
                # Remove the special character 'à,é,è'
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'\ó','o')
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'\ú','u')
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'\é','e')
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'\è','e')
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'\á','a')
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'\à','a')
                # Remove the special character and the number
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r"\’|\“|\”|\s\–|\'|\‘\…\ •",'')
                data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'[0-9]+|\¼|\½|\¼½', '')
                
                # Special case for 'since_you_are_here'
                if i == 'article_content':
                    data['clean_'+str(i)] = data['clean_'+str(i)].str.replace(r'since you’re here.*','')
                print('step 2: words in lowercase')

                # Remove all the english stopwords!  
                data['nostop_words_'+'clean_'+str(i)] = data['clean_'+str(i)].apply(  
                lambda x: ' '.join([word for word in x.split() if word not in (stop_remove)])) 
                # Remove all the words that are shorter than 2 characters! (ml, g,)
                data['nostop_words_'+'clean_'+str(i)] = data['nostop_words_'+'clean_'+str(i)].str.split().map(
                lambda x: " ".join(s for s in x if len(s) > 2))
                
                # Special case for 'first word of article' - remove it due to the \n break cleaning!
                if i == 'article_content':
                    data['nostop_words_'+'clean_'+str(i)] = data['nostop_words_'+'clean_'+str(i)].str.partition(' ')[2]
                print('step 3: nonstopwords and special characters removed')

                # Tokenize the text  
                data['token_'+str(i)] = data['nostop_words_'+'clean_'+str(i)].apply(word_tokenize)
                print('step 4: words tokenize')
                
                # Lemmatize Words - Plural and Singular are treated the same!
                data['lemmatize_'+str(i)] = data['token_'+str(i)].apply(lambda t: ' '.join([lemmatizer.lemmatize(w) for w in t]))
                #data['lem_words'+str(i)] = ' '.join([lemmatizer.lemmatize(w) for w in data['token_'+str(i)]])
                print('step 5: words lemmatize')
                print('Data Clean Finish')
                print("")
        return

cleandata(data)

headline_list
step 1: punctuation remove
step 2: words in lowercase
step 3: nonstopwords and special characters removed
step 4: words tokenize
step 5: words lemmatize
Data Clean Finish

subtitle
step 1: punctuation remove
step 2: words in lowercase
step 3: nonstopwords and special characters removed
step 4: words tokenize
step 5: words lemmatize
Data Clean Finish

article_content
step 1: punctuation remove
step 2: words in lowercase
step 3: nonstopwords and special characters removed
step 4: words tokenize
step 5: words lemmatize
Data Clean Finish



In [16]:
data.head()

Unnamed: 0,date,headline_list,category,author,subtitle,article_content,comments_number,share_number,url,year,...,token_headline_list,lem_wordsheadline_list,clean_subtitle,nostop_words_clean_subtitle,token_subtitle,lem_wordssubtitle,clean_article_content,nostop_words_clean_article_content,token_article_content,lem_wordsarticle_content
0,2008-09-10,The G2 weekly recipe: Beetroot pickled eggs,The G2 weekly recipe,Allegra McEvedy,none,"B\noth pickling and beetroot are enjoying a renaissance at the moment, while eggs never go out of fashion. When I was over in Virginia, US, the other week my friend James, knowing I liked trying some of the more weird and wonderful local delicacies gave me one of these pink eggs that a friend of his had made.\nHe made his with the liquid from a store-bought jar of pickled beets, which was good and interesting, but a subsequent version made with fresh juice made them even more arresting and b...",No Comments,4,https://www.theguardian.com/lifeandstyle/2008/sep/10/eggs.recipe,2008,...,"[weekly, recipe, beetroot, pickled, eggs]",weekly recipe beetroot pickled egg,none,none,[none],none,b oth pickling and beetroot are enjoying a renaissance at the moment while eggs never go out of fashion when i was over in virginia us the other week my friend james knowing i liked trying some of the more weird and wonderful local delicacies gave me one of these pink eggs that a friend of his had made he made his with the liquid from a storebought jar of pickled beets which was good and interesting but a subsequent version made with fresh juice made them even more arresting and beautiful bo...,pickling beetroot enjoying renaissance moment eggs never fashion virginia week friend james knowing liked trying weird wonderful local delicacies gave one pink eggs friend made made liquid storebought jar pickled beets good interesting subsequent version made fresh juice made even arresting beautiful colour flavour dozen freerange eggs vinegar pickling distilled beetroot juice amount obtained juicing fresh raw beetroot golden granulated sugar boil pan water gently lower eggs using slotted sp...,"[pickling, beetroot, enjoying, renaissance, moment, eggs, never, fashion, virginia, week, friend, james, knowing, liked, trying, weird, wonderful, local, delicacies, gave, one, pink, eggs, friend, made, made, liquid, storebought, jar, pickled, beets, good, interesting, subsequent, version, made, fresh, juice, made, even, arresting, beautiful, colour, flavour, dozen, freerange, eggs, vinegar, pickling, distilled, beetroot, juice, amount, obtained, juicing, fresh, raw, beetroot, golden, granul...",pickling beetroot enjoying renaissance moment egg never fashion virginia week friend james knowing liked trying weird wonderful local delicacy gave one pink egg friend made made liquid storebought jar pickled beet good interesting subsequent version made fresh juice made even arresting beautiful colour flavour dozen freerange egg vinegar pickling distilled beetroot juice amount obtained juicing fresh raw beetroot golden granulated sugar boil pan water gently lower egg using slotted spoon sim...
1,2008-09-10,Just peachy,Allotment blog,Claire Ptak,A report and recipe from the Slow Food Nation event in San Francisco,"W\nhen I picked my husband up at the San Francisco International airport on Sunday, it was with great trepidation that I admitted an infidelity. 'I have something to tell you,' I said, 'I've been cheating on you — with California.' His knowing smile beamed as he adjusted his sunglasses and put down the window of our borrowed Volvo.\n\nI took him straight to the Mission district, to our old favourite, La Taqueria, for the self-proclaimed ""best burritos and tacos in the world."" Full on beans, ...",0,0,https://www.theguardian.com/global/allotment/2008/sep/10/recipe,2008,...,[peachy],peachy,a report and recipe from the slow food nation event in san francisco,report recipe slow food nation event san francisco,"[report, recipe, slow, food, nation, event, san, francisco]",report recipe slow food nation event san francisco,w hen i picked my husband up at the san francisco international airport on sunday it was with great trepidation that i admitted an infidelity i have something to tell you i said ive been cheating on you — with california his knowing smile beamed as he adjusted his sunglasses and put down the window of our borrowed volvo i took him straight to the mission district to our old favourite la taqueria for the selfproclaimed best burritos and tacos in the world full on beans grilled steak perfectl...,picked husband san francisco international airport sunday great trepidation admitted infidelity something tell said cheating california knowing smile beamed adjusted sunglasses put window borrowed volvo took straight mission district old favourite taqueria selfproclaimed best burritos tacos world full beans grilled steak perfectly ripe mashed avocados burning hot salsa hopped back car head country week needed stock supplies reunion supper dropped market loaded heirloom tomatoes red purple ye...,"[picked, husband, san, francisco, international, airport, sunday, great, trepidation, admitted, infidelity, something, tell, said, cheating, california, knowing, smile, beamed, adjusted, sunglasses, put, window, borrowed, volvo, took, straight, mission, district, old, favourite, taqueria, selfproclaimed, best, burritos, tacos, world, full, beans, grilled, steak, perfectly, ripe, mashed, avocados, burning, hot, salsa, hopped, back, car, head, country, week, needed, stock, supplies, reunion, s...",picked husband san francisco international airport sunday great trepidation admitted infidelity something tell said cheating california knowing smile beamed adjusted sunglass put window borrowed volvo took straight mission district old favourite taqueria selfproclaimed best burrito taco world full bean grilled steak perfectly ripe mashed avocado burning hot salsa hopped back car head country week needed stock supply reunion supper dropped market loaded heirloom tomato red purple yellow green...
2,2008-09-13,The new vegetarian,The new vegetarian,Yotam Ottolenghi,Goat's cheese soufflés with vanilla poached peach,"A tribute to the 90s, when double-baked soufflés appeared on menus across the land. Serves six.\n150ml each water and white wine\n150g caster sugar\n½ tsp black peppercorns\n½ vanilla pod, split and seeds scraped\n3 medium peaches, peeled\n60g ground hazelnuts\n280ml milk\n1 bay leaf\n½ onion, studded with a few cloves\n60g unsalted butter, at room temperature, plus more for brushing\n40g plain flour\n180g hard goat's cheese, broken up\n4 medium eggs, separated, plus one extra white\n½ tsp s...",No Comments,0,https://www.theguardian.com/lifeandstyle/2008/sep/13/vegetarian.cheese,2008,...,"[new, vegetarian]",new vegetarian,goats cheese souffles with vanilla poached peach,goats cheese souffles vanilla poached peach,"[goats, cheese, souffles, vanilla, poached, peach]",goat cheese souffle vanilla poached peach,a tribute to the s when doublebaked souffles appeared on menus across the land serves six ml each water and white wine g caster sugar tsp black peppercorns vanilla pod split and seeds scraped medium peaches peeled g ground hazelnuts ml milk bay leaf onion studded with a few cloves g unsalted butter at room temperature plus more for brushing g plain flour g hard goats cheese broken up medium eggs separated plus one extra white tsp salt put the water wine sugar peppercorns and vanilla i...,doublebaked souffles appeared menus across land serves six water white wine caster sugar black peppercorns vanilla pod split seeds scraped medium peaches peeled ground hazelnuts milk bay leaf onion studded cloves unsalted butter room temperature plus brushing plain flour hard goats cheese broken medium eggs separated plus one extra white salt put water wine sugar peppercorns vanilla pan bring simmer add peaches cover simmer minutes soft much start disintegrate set aside cool preheat oven gas...,"[doublebaked, souffles, appeared, menus, across, land, serves, six, water, white, wine, caster, sugar, black, peppercorns, vanilla, pod, split, seeds, scraped, medium, peaches, peeled, ground, hazelnuts, milk, bay, leaf, onion, studded, cloves, unsalted, butter, room, temperature, plus, brushing, plain, flour, hard, goats, cheese, broken, medium, eggs, separated, plus, one, extra, white, salt, put, water, wine, sugar, peppercorns, vanilla, pan, bring, simmer, add, peaches, cover, simmer, min...",doublebaked souffle appeared menu across land serf six water white wine caster sugar black peppercorn vanilla pod split seed scraped medium peach peeled ground hazelnut milk bay leaf onion studded clove unsalted butter room temperature plus brushing plain flour hard goat cheese broken medium egg separated plus one extra white salt put water wine sugar peppercorn vanilla pan bring simmer add peach cover simmer minute soft much start disintegrate set aside cool preheat oven gas mark pour enoug...
3,2008-09-13,How to bake,How to bake,Dan Lepard,none,"Cheese and black pepper buttons\nOdd little bits of cheese, especially those ones that have gone slightly crusty at the end, can be used up in combination for making these biscuits. They're especially good eaten as an early evening snack to accompany a glass or two of dry fino sherry. Look on the Parmesan and other cheeses mentioned below as mere suggestions, and check the deli counter at the supermarket for odd bits of remaindered cheese they sell cheaply. You know what they say: every litt...",No Comments,4,https://www.theguardian.com/lifeandstyle/2008/sep/13/cheese.baking,2008,...,[bake],bake,none,none,[none],none,cheese and black pepper buttons odd little bits of cheese especially those ones that have gone slightly crusty at the end can be used up in combination for making these biscuits theyre especially good eaten as an early evening snack to accompany a glass or two of dry fino sherry look on the parmesan and other cheeses mentioned below as mere suggestions and check the deli counter at the supermarket for odd bits of remaindered cheese they sell cheaply you know what they say every little helps ...,black pepper buttons odd little bits cheese especially ones gone slightly crusty end used combination making biscuits theyre especially good eaten early evening snack accompany glass two dry fino sherry look parmesan cheeses mentioned mere suggestions check deli counter supermarket odd bits remaindered cheese sell cheaply know say every little helps unsalted butter softened coarsely ground black pepper small clove garlic peeled grated paste salt parmesan finely grated hard cheese cheddar dou...,"[black, pepper, buttons, odd, little, bits, cheese, especially, ones, gone, slightly, crusty, end, used, combination, making, biscuits, theyre, especially, good, eaten, early, evening, snack, accompany, glass, two, dry, fino, sherry, look, parmesan, cheeses, mentioned, mere, suggestions, check, deli, counter, supermarket, odd, bits, remaindered, cheese, sell, cheaply, know, say, every, little, helps, unsalted, butter, softened, coarsely, ground, black, pepper, small, clove, garlic, peeled, g...",black pepper button odd little bit cheese especially one gone slightly crusty end used combination making biscuit theyre especially good eaten early evening snack accompany glass two dry fino sherry look parmesan cheese mentioned mere suggestion check deli counter supermarket odd bit remaindered cheese sell cheaply know say every little help unsalted butter softened coarsely ground black pepper small clove garlic peeled grated paste salt parmesan finely grated hard cheese cheddar double glou...
4,2008-09-14,Perfect pasta,Nigel Slater recipes,Nigel Slater,Nigel Slater cooks pasta with aubergine and pine kernels followed by plum and cinnamon pie,"T\nhe pasta I like best is one capable of holding a sauce - those that come with nooks, crannies, pockets and hollows in which a dribble of cream or a morsel of sauce can safely hide till it gets in your mouth. It's the shells, tubes and spirals that get my vote over the ribbons and strings every time. Of course this could be pure greed: there is something much more satisfying about biting into a bit of pasta that oozes cheesy, herby sauce into your mouth than one that feels like a piece of ...",No Comments,3,https://www.theguardian.com/lifeandstyle/2008/sep/14/fruit.vegetarian,2008,...,"[perfect, pasta]",perfect pasta,nigel slater cooks pasta with aubergine and pine kernels followed by plum and cinnamon pie,nigel slater cooks pasta aubergine pine kernels followed plum cinnamon pie,"[nigel, slater, cooks, pasta, aubergine, pine, kernels, followed, plum, cinnamon, pie]",nigel slater cook pasta aubergine pine kernel followed plum cinnamon pie,t he pasta i like best is one capable of holding a sauce those that come with nooks crannies pockets and hollows in which a dribble of cream or a morsel of sauce can safely hide till it gets in your mouth its the shells tubes and spirals that get my vote over the ribbons and strings every time of course this could be pure greed there is something much more satisfying about biting into a bit of pasta that oozes cheesy herby sauce into your mouth than one that feels like a piece of oily strin...,like best one capable holding sauce come nooks crannies pockets hollows dribble cream morsel sauce safely hide till gets mouth shells tubes spirals get vote ribbons strings every time course could pure greed something much satisfying biting bit pasta oozes cheesy herby sauce mouth one feels like piece oily string interesting pasta shapes shellshaped conchiglie earlike orecchiette didnt happen know someone put great deal thought stuff treat ultimate nobrain supper took nothing short genius co...,"[like, best, one, capable, holding, sauce, come, nooks, crannies, pockets, hollows, dribble, cream, morsel, sauce, safely, hide, till, gets, mouth, shells, tubes, spirals, get, vote, ribbons, strings, every, time, course, could, pure, greed, something, much, satisfying, biting, bit, pasta, oozes, cheesy, herby, sauce, mouth, one, feels, like, piece, oily, string, interesting, pasta, shapes, shellshaped, conchiglie, earlike, orecchiette, didnt, happen, know, someone, put, great, deal, thought...",like best one capable holding sauce come nook cranny pocket hollow dribble cream morsel sauce safely hide till get mouth shell tube spiral get vote ribbon string every time course could pure greed something much satisfying biting bit pasta ooze cheesy herby sauce mouth one feel like piece oily string interesting pasta shape shellshaped conchiglie earlike orecchiette didnt happen know someone put great deal thought stuff treat ultimate nobrain supper took nothing short genius come classic sha...


In [18]:
data_clean = data[['date', 'headline_list', 'category', 'comments_number', 'share_number', 'url', 'year',
       'month', 'day', 'clean_headline_list', 'nostop_words_clean_headline_list', 'token_headline_list',
       'clean_subtitle', 'nostop_words_clean_subtitle', 'token_subtitle','clean_article_content', 
       'nostop_words_clean_article_content', 'token_article_content','lem_wordsheadline_list','lem_wordsheadline_list','lem_wordsarticle_content']]

In [19]:
# Save the Clean Text in a CSV file
data_clean.to_csv('guardian_clean_data_final.csv')
print('data clean and save in csv file')

data clean and save in csv file
