In [43]:
import csv
import os
import re
from chardet.universaldetector import UniversalDetector
import codecs

# Set directory
os.chdir('C:\\Users\\chapman4\\Desktop\\zuck-content\\')

# Filename constants
FILE_FULL_TEXT = 'zuck_full_text.csv'

# Detector object for encoding detection
detector = UniversalDetector()

def detect_encoding(filename):
    with open(filename + '.txt', 'rb') as infile:
        detector.reset()
        for line in infile:
            detector.feed(line)
            if detector.done:   # detection process ends automatically when confidence is high enough
                break
        detector.close()
        return detector.result['encoding']
    
# Creating list of filenames
def create_filenames():
    filenames = [name.split(".")[0] for name in os.listdir(".") if name.endswith(".txt")]
    return filenames

# Read text files, save content to dictionary (id : content)
def grab_text(filenames):
    rows = []
    for filename in filenames:
        encoding = detect_encoding(filename)
        with codecs.open(filename + '.txt', 'r', encoding) as infile:
            row = {}
            content = ''
            for line in infile:
                line_clean1 = line.strip()
                line_clean2 = re.sub('[^A-Za-z0-9]+', ' ', line_clean1)
                line_clean3 = re.sub('Mark Zuckerberg', '', line_clean2)
                content += line_clean3
            row['filename'] = filename
            row['content'] = content
            rows.append(row)
            print(row)
    return rows
        
# Write to csv file
def write_csv(filename, rows):
    with open(filename, 'w', encoding='utf-8') as csv_file:
        col_names = rows[0].keys()
        writer = csv.DictWriter(csv_file, col_names)
        writer.writeheader()
        writer.writerows(rows) 
        
def main():
    filenames = create_filenames()
    rows = grab_text(filenames)
    write_csv(FILE_FULL_TEXT, rows)
    
main()

{'filename': '2004-003', 'content': ' Male CNBC News Service Show called Bullseye hosted by our Becky Quick In 2004 longbefore Facebook was a household name  was one of Becky s guest   When we first launched we were hoping for you know maybe 400 500 people Harvarddidn t have a Facebook So that s the gap that we were trying to fill and now we re at 100 000people So who knows where we re going next Um we re hoping to have many moreuniversities by fall Hopefully over 100 or 200 and from there we re gonna launch a bunch ofset applications which should keep people coming back to the site and maybe to makesomething cool  Becky What is The Facebook exactly   It s an online directory that connects people through universities and colleges thoughtheir social networks there You sign on You make a profile about yourself by answering somequestions entering some information such as your concentration or major at school um contactinformation about phone numbers instant messaging screen names anything

{'filename': '2008-002', 'content': ' Sarah Lacy Everyone so I m Sarah Lacy That is not a camera and if you don t know who this guy is you should really leaving give your seat to someone in the overflow room This is of course  from Facebook   Thank you  Sarah Lacy So uh so uh there I m in a lucky position I m the lucky reporter who actually gets to ask him every obnoxious question So you know I actually feel like most of the stuff that s been written about Facebook since the news since the platform launch has been all corporate stuff it s been year 23 it s been the 15 billion valuation The core of why you guys are doing so well is the site itself and I think a lot of people have a lot of misunderstandings about the site itself and it s something we ve talked about in the past So to start up you know I just want you to talk a little bit about the role Facebook is playing in the world now and how that s evolved since it was just your Harvard project   Sure actually that s a great first p

{'filename': '2008-004', 'content': ' Kara Swisher  0 01  and Sheryl Sandberg Claps once Music starts Audience applauds Zuckerberg and Sandberg enters from stage left Both are carrying a water bottle Sandberg is also carrying a big gift bag  Kara Swisher  0 09 Hey Shaking Zuckerberg s hand Hi Going to and Shaking Sandberg s hand Hey there How s it going Have a seat All three 3 go to their seats Swisher sits down  Kara Swisher  0 15 Ah  Sheryl Sandberg  0 17 So we uh pause we brought you a gift Hands Swisher a purple princess gift bag  Kara Swisher  0 18 A gift  Sheryl Sandberg  0 19 Nods We did We brought you a gift  Kara Swisher  0 19 And I m such a princess Yeah I mean I mean obviously Sandberg hands the bag to Swisher Swisher takes the bag Oh no I m a little frightened  Sheryl Sandberg  0 22 Inhales We learned Swisher opening bag this morning from Barry Diller sitting down that we were the Swisher reaching into bag princess phone of Zuckerberg sitting down after Sandberg our generat

{'filename': '2008-005', 'content': ' Speaker 1  Music Applause Uh now uh things have changed dramatically I d like to bring up  who now wears tennis shoes Laughing Applause Mark Inaudible 00 00 20 You can sit you don t have to you don t have to like you can come closer there I won t bite   All right  Speaker 1 Um So Mark You remember the first question I asked you last year   Oh yeah  Speaker 1 I asked you how s the financing going   Yeah and I said We re going to close it next week  Speaker 1 So let me ask you how s the financing going Laughs   Um Well you know I wish I could say the same thing as I said last year So last year we were basically right in the middle of of discussing ad partnerships with a bunch of companies and um as you know we chose we we went with Microsoft and did this advertising partnership which has become a much broader partnership where we now work with them on other technology as well including search which we launched most recently And we took an investment 

{'filename': '2009-012', 'content': '  Hey Thanks for all coming out today It s an honor to be here and to have that chance to talk to you guys So I m only going to talk for maybe 10 or 15 minutes and then I want to be able to spend the rest of the time answering your questions So I have a brief history of what we ve done with Facebook and what we re trying to do that I ll get started with and then just take a bunch of times to take your questions Figuring out how the click All right so for you guys who are aren t familiar with what Facebook is trying to do our mission is to give people the power to share and to make the world more open and connected So what we mean by this is that more open world is there s more information available people can have access to more information you can see what s going on with the people around you and more connected means that people can stay connected better with their friends and family people immediately around them but also people all across the wo

{'filename': '2010-008', 'content': ' Article You might expect that Facebook founder and CEO  would dismiss the four NYU college students who want to take on Facebook s dominance of social networking by building a distributed open alternative that includes a way for people to run their own servers But instead Zuckerberg said he donated to the Diaspora project adding to the 190 000 it has raised in part because he appreciates their drive to change the world Note This reporter followed up with Facebook s press office Thursday to ask how much Zuckerberg donated to Diaspora but the press office said they d rather not answer In an interview with Wired com on Wednesday after announcing simpler privacy controls for Facebook Zuckerberg also talked about where he sees the site going his drive to make the world more open why the face in Facebook is so important and why he wouldn t start a social network if he were launching a site today  Wired com What do you think of the push for an open federa

{'filename': '2010-010', 'content': ' Kara I just want to say a lot of things 00 00 05 talking about Mark that I think he has a lot of guts to come up here and talk about it and crosstalk 00 00 10  Walt I just was telling him that  Kara Yeah  Walt He s he s he came here He s eager to talk about uh where Facebook is going Uh both the controversies and issues they ve been going through on  Kara And where crosstalk 00 00 22  Walt privacy and also where social networking is going So without further ado  Kara So without further ado   Music  Walt Thanks man  Kara So so what would you like to begin  Walt Um well why don t we do it this way we do want to get to the talking about the future but we have some there is some controversy you ve just been through So let me just start by asking you um you have a business that s based on sharing Obviously people go on Facebook unless they wanted to share with groups of people but there is this perception that uh you re on a course or have been on a cou

{'filename': '2010-013', 'content': ' John Good evening everyone and welcome to the museum My names John Holler I m the CEO And on behalf of the trustees our staff our members and everyone involved with the museum it s a pleasure to welcome you here tonight to this event It is the latest installment in our series of programs on the 40th anniversary of the events that led to today s fully wired world I want to begin by thanking Intel for serving as the lead sponsor of our Net at 40 series Intel is a very generous supporter and a good friend to the museum We very much appreciate they re underwriting of these programs Along the way we ve also had support from Semantic Corporation and of course our donors and members who in the last 12 months have provided more than two million dollars in support for our work Kepler s is the museum partner for books related to our programs as they are tonight They ll be selling the Facebook Effect after our program I want to thank Clark Kepler for their on

{'filename': '2010-022', 'content': ' Blodget Welcome to the Business Insider s series on Innovation I am Henry Blodget Our guest today is  He is the founder and CEO of the modestly successful Facebook Mark founded the Facebook when he was nineteen as a sophomore at Harvard He ended up quitting Harvard moving to California building the company and they now have more than 250 million users worldwide So obviously a lot to talk about on the Innovation front Let s jump right into it So Mark when a company like Facebook gets to this size gets to this level of maturity A lot of people will start looking at it and assume that this was the direction you set out to go the whole time Take us back to the dorm room at Harvard Talk about the initial idea and what the initial idea looked like relative to where you are today   Well so my sophomore year at Harvard when I first built this there were a lot of really interesting stuff going on I mean there were two primary things that were happening kind

{'filename': '2010-024', 'content': ' Female Cinch  Robert So we re here in the office with  So Mark what s up Laughs um the announcements you made this morning were pretty uh interesting for developers Um where do you think this is going in terms of how is our inaudible 00 00 23 changing because of social location That s really I think the key thing that I ve seen going on   I don t know I don t think it has changed much yet Um but uh I mean our view on all of the stuff is that if you look five years out like every vertical or industry is going to be rethought in a social way right So I I just just moved laughing other people out of the way That s cool  Robert  Crosstalk 00 00 53 we re all trying to record it   It was amazing how like liquid that was Um  Male He s done it before   So I mean I think that that s the big thing right The and before today I think a lot of people just thought about Facebook on mobile devices as the app right Like you you use the Facebook app or you use the 

{'filename': '2013-036', 'content': ' David B  Applause Good afternoon My name is David Bradley I m the owner of theAtlantic Um I am so appreciative of your being here I had not had any idea that so many couldbe with us especially with such a last minute invitation Uh it just feels terrible to begin atestimonial um but I should probably address this to you Mark Before I joined Facebook I never had this group of friends before Laughter And then Ijoined Facebook and now everywhere I go they come with me laughter So we re goingshopping we go to the movies together we go to TV Um it s nice to have friends Dear friends laughter I want to tell you about Walt Disney born in 1901 Uh he lefthome at age 18 and moved to Kansas City where he moved in with his brother He was good atsketching cartoons and so he um went to the local to the local movie theater which was the onlytheater in town and said Can I draw posters for you and put them around town uh as a way ofpromoting the film And he was hire

{'filename': '2014-025', 'content': ' Speaker 1 A warm welcome to all of you and also uhh our our Founder and CEO MarkZuckerberg Uhh we ve got about 20 to 30 minutes Uhh thank you for sending yourquestions well in advance In the interest of time umm I d like to hand the mike out toAnup Jairam from Business world  Anup Hi Mark you can inaudible 00 00 19 at Barcelona Inaudible 00 00 22 inBarcelona earlier this year I believe a meeting inaudible 00 00 28 today Are youusing more action with Indian telcos and are you What re you doing consideringthat India is uhh a market where you re going online through a mobile do you seetie ups with indian telecos happening  Speaker 1 So you met Sunil Mittal the chairman of Bharti at the Mobile World Congress inBarcelona Uhh Anup s question is considering that India s going online and mobileis a big boom uhh could we expect certain deals with uhh Airtel   I see so between Facebook and those companies Well I mean a lot of what we retrying to do and then 

{'filename': '2016-006', 'content': ' Article Facebook founder and CEO  sat down with Axel Springer CEO Mathias D pfner for the German newspaper Die Welt am Sonntag last week in Berlin Zuckerberg was given the first ever Axel Springer Award for being an exceptional innovator and entrepreneur This lightly edited Q A is an excerpt from an interview given to Die Welt Welt am Sonntag  D pfner Mark on Facebook we learned that you were jogging at the Brandenburg Gate this morning How was it   It was good It was the first time I have gone running in snow in 20 years Whenever I go to a new city in order to help get on the right time zone and actually get a chance to see that city I like running so today was awesome  D pfner It s not your first time in Berlin   No and I love it Berlin is one of my favorite cities in the world I feel like the energy is very youthful It has such an important history including its recent history of unification In a lot of ways Berlin is a symbol for me of Facebook

{'filename': '2016-016', 'content': '  Hey everyone uh we re live from Facebook headquarters uh for the first ever uh live Q A that we ve done as a community together Uh so you know I ve been going around the world and um and doing some town hall Q As uh to learn from people around the community uh what you wanna hear from Facebook uh wh what you what you wanna to see on Facebook and um what we can be doing better to serve you And you know we we ve done about 10 of these 00 00 30 um as I go around the world to different cities And you know recently we ve launched this new product Live uh which allows us to hear from from people um all over the world live And you know I I have been The these town hall Q As have been modeled off of um Q internal Q As that I ve been doing at Facebook for for for many years now And a few weeks ago I I started off trying to do um an internal live Q A 00 01 00 and I found that it was just so much more fun and engaging and I could see people s comments as I w

{'filename': '2016-018', 'content': ' Barack Obama Thank you Applause Hello everybody Thank you so much Thank you Applause Thank you Thank you very much Thank you Everybody have a seat Thank you so much Thank you Thank you so much Well this is a good looking group Thank you Applause Well first of all let me thank President Hennessy for the introduction and the entire Stanford family for letting us take over the campus for a few days As some of you know John is stepping down after 16 years as president of Stanford Fortunately for me I cannot do that to just stick around longer than my term limit John I m sure there are some people who want you to stick around longer but I m confident that you re going to do extraordinary things And we could not be prouder of John Hennessey and Stanford and all the great work that they have done So please give him a big round of applause Applause Now it s summer break Just so you all of you know Stanford is not always this quiet This school is unique Fol

{'filename': '2016-023', 'content': '   Applause Hi Hi thank you It s it s so great it s it s so great to be here with you guys at University of Luiss Thank you for hosting me this this afternoon Applause Now before we get started I want to say a few words about the earthquake You know our our hearts go out to all of the people who have lost a home or have lost a loved one or know someone who is affected by the earthquake It s really hard to imagine I think what it s like to just in an instant have your your whole life turned upside down like that And you know in moments like this it s it s heartbreaking but also seeing how the community here in Italy and around the world I have been able to come together to rally for the people who ve been affected by the disaster and those who love them um seeing people come together and connect is I always find really inspiring And the spirit of the people here in Italy that we are going to get through this and we are going to rebuild and it s going

{'filename': '2017-008', 'content': '  Harvard Commencement 2017President Faust Board of Overseers faculty alumni friends proud parents members of the ad board and graduates of the greatest university in the world I m honored to be with you today because let s face it you accomplished something I never could If I get through this speech it ll be the first time I actually finish something at Harvard Class of 2017 congratulations I m an unlikely speaker not just because I dropped out but because we re technically in the same generation We walked this yard less than a decade apart studied the same ideas and slept through the same Ec10 lectures We may have taken different paths to get here especially if you came all the way from the Quad but today I want to share what I ve learned about our generation and the world we re building together But first the last couple of days have brought back a lot of good memories How many of you remember exactly what you were doing when you got that email t

{'filename': '2017-159', 'content': '  Live with Dreamers at my home '}
{'filename': '2017-160', 'content': '  I m sitting with some Dreamers at my home talking about DACA and immigration We ll go live and take questions at noon '}
{'filename': '2017-161', 'content': '  Priscilla and I are proud to announce that the Chan Zuckerberg Initiative is making a 75 millioninvestment to help launch Resolve to Save Lives a new global health initiative to save 100million lives by preventing epidemics and cardiovascular disease If we re going to cure prevent and manage all diseases in our children s lifetime we need to beready for the next infectious disease outbreaks like we ve seen with Ebola and Zika in recentyears Many countries don t have the infrastructure to detect threats early and prevent them fromspreading and that puts millions of lives at risk all around the world Resolve will work withgovernments to build these systems to identify and contain outbreaks sooner At the same time more peo

{'filename': '2017-200', 'content': '  My personal challenge this year was to have visited every US state by the end of 2017 to listen and learn how people are thinking about their lives their work and their future Last Friday on my last stop of the year I had a conversation with Neeli Bendapudi Provost and Executive Vice Chancellor University of Kansas to discuss what I ve learned Below is a transcript of our conversation edited for clarity and length   Good morning It s an honor to be here with you to conclude this year of travel And I m really excited to hear your questions and to talk about what I ve seen and learned this year  Neeli We are so grateful you chose the University of Kansas as your final stop on this year of travel And I also want to make sure we welcome everyone who s joining us live from all over the world Let s start off I m sure people are wondering how did you end up in Kansas You ve traveled to 30 states this year Most of us do New Year s Resolutions You do a cha