In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [2]:
!pip install textblob
from textblob import TextBlob



In [3]:
# import Beautiful Soup, NumPy and Pandas, etc
import bs4 as bs
import numpy as np
import pandas as pd
import re
import hashlib
 
# download NLTK classifiers - these are cached locally on your machine
import nltk
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')

# import ml classifiers
from nltk.tokenize import sent_tokenize # tokenizes sentences
from nltk.stem import PorterStemmer     # parsing/stemmer
from nltk.tag import pos_tag            # parts-of-speech tagging
from nltk.corpus import wordnet         # sentiment scores
from nltk.stem import WordNetLemmatizer # stem and context
from nltk.corpus import stopwords       # stopwords
from nltk.util import ngrams            # ngram iterator

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /opt/conda/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /opt/conda/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /opt/conda/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /opt/conda/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
ps = PorterStemmer()
wnl = WordNetLemmatizer()
eng_stopwords = set(stopwords.words('english'))

## NLP Code to clean texts, and lemmatize prior to finding polarity scores
## taken from HW3 Solution Video
def reviewcleaner(review, lemmatize = True, stem = False):
    if lemmatize == True and stem == True:
        raise RuntimeError("May not pass both as true")
    #Remove HTML Tags
    review = bs.BeautifulSoup(review).text
    
    #use regex to find emoticons
    emoticons = re.findall(' (?::|;|=)(?:-)?(?:\)|\(|D|P)',review)
    
    #Remove punctuation
    review = re.sub9("[^a-zA-Z]", ' ', review)
    
    #Tokenize into words (all lower case)
    review = review.lower().split()
    
    #remove stopwords, lemmatize, stem
    clean_review = []
    for word in review: 
        if word not in eng_stopwords:
            if lemmatize is True:
                word = wnl.lemmatize(word)
            elif stem is True:
                word = ps.stem(word)
            clean_review.append(word)
            
    #join the review to one sentence
    review_processed = ' '.join(clean_review + emoticons)

In [5]:
#Function to Handle Missing Data in social media posts
def fillnaf (tbl):
    tbl1 = tbl["Body"].fillna(tbl["Title"])
    tbl["Body"] = tbl1
    return tbl

## Calculate Polarities per University

In [6]:
#Load in Berkeley Data and Rename columns
berk = pd.read_csv('berkeleydata.csv')
berk.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
berk['University'] = 'UC Berkeley'
#berk['Body'] = reviewcleaner(berk['Body'], True, False)
berk.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,How's everyone feeling about the 70 midterm?,1.0,3/1/2020 0:43,3.0,The previous midterms seem pretty hard. Idk ho...,-0.069444,0.569444,UC Berkeley
1,1,Livestream Results HERE!,1.0,3/1/2020 0:44,1.0,Livestream Results HERE!,0.0,0.0,UC Berkeley
2,2,Any NASCAR fans here?,1.0,3/1/2020 1:31,0.0,[removed],0.0,0.0,UC Berkeley
3,3,What are the easiest minors in terms of class ...,1.0,3/1/2020 1:53,2.0,What are the easiest minors in terms of class ...,0.0,0.0,UC Berkeley
4,4,where are people getting the piazza stickers from,1.0,3/1/2020 1:55,10.0,this is very random but I feel like I've recen...,-0.019643,0.342857,UC Berkeley


In [7]:
#Load in UCLA Data and Rename columns
ucla = pd.read_csv('ucladata.csv')
ucla.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
ucla['University'] = 'UCLA'
#ucla['Body'] = reviewcleaner(ucla['Body'], True, False)
ucla.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,Looking for 2 Females ready to sign a lease fo...,1,2020-03-01 09:15:20,0,This would be for a September move-in! Please ...,0.5,0.5,UCLA
1,1,Hey Siri play UCLA by RL Grime 🔥,1,2020-03-01 09:34:42,1,Hey Siri play UCLA by RL Grime 🔥,0.0,0.0,UCLA
2,2,How do I even study for Psych100B,1,2020-03-01 10:12:35,1,I got a near perfect score the first time I ra...,0.115625,0.338542,UCLA
3,3,Serious Question: Anyone else really into Dark...,1,2020-03-01 10:17:30,2,I’ve yet to meet a single person who feels hel...,-0.285714,0.607143,UCLA
4,4,Buying CHEM 153C past midterm 2,1,2020-03-01 10:29:27,0,Please 😢 I’m struggling \nWilling to pay,0.25,0.75,UCLA


In [8]:
#Load in UCSD Data and Rename columns
ucsd = pd.read_csv('ucsddata.csv')
ucsd.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
ucsd['University'] = 'UC San Diego'
#ucsd['Body'] = reviewcleaner(ucsd['Body'], True, False)
ucsd.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,Not UC Socially Dead...,1,2020-03-01 00:12:07,15,UC Super Depression.\n\nAm I right?,0.309524,0.60119,UC San Diego
1,1,Academic Probation/Subject to Disqualification...,1,2020-03-01 02:54:10,0,[removed],0.0,0.0,UC San Diego
2,2,Academic probation help!,1,2020-03-01 02:57:19,5,[removed],0.0,0.0,UC San Diego
3,3,working at the zone?,1,2020-03-01 05:44:33,1,how busy is it? what are the hours like? was i...,0.1,0.481481,UC San Diego
4,4,Python/Coding extracurricular?,1,2020-03-01 09:53:48,2,"I know this is pretty late in the year, but do...",0.093056,0.719444,UC San Diego


In [9]:
#Load in UC Irvine Data and Rename columns
uci = pd.read_csv('ucidata.csv')
uci.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
uci['University'] = 'UC Irvine'
#uci['Body'] = reviewcleaner(uci['Body'], True, False)
uci.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,ASUCI,1,2020-03-01 10:12:25,0,so did the ASUCI president really get impeache...,0.119444,0.425,UC Irvine
1,1,I always feel like an afterthought,1,2020-03-01 10:45:45,17,I guess this is more of a rant and r/offmyches...,-0.114409,0.57344,UC Irvine
2,2,Coronavirus international student,1,2020-03-01 10:53:20,4,"Hey guys, does anyone know what would happen t...",0.0,0.05,UC Irvine
3,3,COMPSCI 164 Goodrich,1,2020-03-01 13:50:50,0,How hard would this class be if I haven't take...,-0.291667,0.541667,UC Irvine
4,4,Pre-Req Clearance Problem?,1,2020-03-01 17:22:23,2,When I tried to enroll in CS 161 and 162 befor...,0.142857,0.394048,UC Irvine


In [10]:
#Load in UC Davis Data and Rename columns
davis = pd.read_csv('ucddata.csv')
davis.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
davis['University'] = 'UC Davis'
#davis['Body'] = reviewcleaner(davis['Body'], True, False)
davis.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,discord group for students who want to talk ab...,1,2020-03-01 12:07:50,1,[https://discord.gg/DUmdMy](https://discord.gg...,0.0,0.0,UC Davis
1,1,I need money,1,2020-03-01 17:30:42,19,What do?,0.0,0.0,UC Davis
2,2,Transfer wanting to change from Biochemistry t...,1,2020-03-01 18:10:37,9,Would I be able to change majors right when I ...,0.117076,0.517163,UC Davis
3,3,"Hi, Need help writing a paper?",1,2020-03-01 20:11:22,0,"Hi, Need help writing a paper? I am here to h...",0.2375,0.6375,UC Davis
4,4,Im digging a hole on campus and filling it wit...,1,2020-03-01 20:22:48,33,Im digging a hole on campus and filling it wit...,0.53772,0.85,UC Davis


In [11]:
#Load in UC Riverside Data and Rename columns
riverside = pd.read_csv('ucr.csv')
riverside.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
riverside['University'] = 'UC Riverside'
#riverside['Body'] = reviewcleaner(riverside['Body'], True, False)
riverside.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,Help installing liquid cpu cooler and front fan,1,2020-03-01 09:16:17,5,I'd rather give money to a fellow student than...,-0.05,0.541667,UC Riverside
1,1,I hate to do this but...,1,2020-03-01 09:41:22,2,I have a project for one of my COM classes &am...,-0.257738,0.519048,UC Riverside
2,2,March for Bernie! Tomorrow at the Bell Tower 1...,1,2020-03-01 09:58:00,12,March for Bernie! Tomorrow at the Bell Tower 1...,0.0,0.0,UC Riverside
3,3,"does anyone have the book, the twilight of Ame...",1,2020-03-01 09:59:39,4,i just need to read chapter 4. So if someone h...,0.0,0.0,UC Riverside
4,4,2098t,1,2020-03-01 11:47:52,2,How do I find this file?,0.0,0.0,UC Riverside


In [12]:
#Load in UC Santa Cruz Data and Rename columns
ucsc = pd.read_csv('ucscdata.csv')
ucsc.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
ucsc['University'] = 'UC Santa Cruz'
#ucsc['Body'] = reviewcleaner(ucsc['Body'], True, False)
ucsc.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,UCSC be like,1,2020-03-01 00:14:47,3,UCSC be like,0.0,0.0,UC Santa Cruz
1,1,Does guaranteed housing status matter if we ca...,1,2020-03-01 01:34:35,5,I'm in a group of 6 current freshman who want ...,0.0,0.15,UC Santa Cruz
2,2,Online Auditing a Course,1,2020-03-01 03:39:36,7,"If, hypothetically, I wanted to learn python a...",0.105556,0.277778,UC Santa Cruz
3,3,Walking at night is scary as heck,1,2020-03-01 05:23:48,28,I was walking to porter from crown at 4:30 am ...,0.136111,0.561111,UC Santa Cruz
4,4,What are the odds classes are canceled on Monday?,1,2020-03-01 06:46:25,1,What are the odds classes are canceled on Monday?,0.0,0.0,UC Santa Cruz


In [13]:
#Load in UC Santa Barbara Data and Rename columns
ucsb = pd.read_csv('ucsbdata.csv')
ucsb.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
ucsb['University'] = 'UC Santa Barbara'
#ucsb['Body'] = reviewcleaner(ucsb['Body'], True, False)
ucsb.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,Is student health free?,1,2020-03-01 01:11:42,3,Does anyone know if making an appointment at t...,0.166667,0.366667,UC Santa Barbara
1,1,Sivakumar Math 6B HW,1,2020-03-01 02:02:15,0,Is it just me or are most of the problems on t...,-0.020833,0.641667,UC Santa Barbara
2,2,Anyone feel related to this?,1,2020-03-01 02:31:02,4,Anyone feel related to this?,0.0,0.4,UC Santa Barbara
3,3,Part time jobs in area,1,2020-03-01 10:24:37,3,Is anywhere in SB/IV/Goleta hiring rn?,0.0,0.0,UC Santa Barbara
4,4,People really hate hoprs huh? Found this while...,1,2020-03-01 11:15:33,13,People really hate hoprs huh? Found this while...,-0.8,0.9,UC Santa Barbara


In [14]:
#Load in UC Merced Data and Rename columns
ucm = pd.read_csv('merceddata.csv')
ucm.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
ucm['University'] = 'UC Merced'
#ucm['Body'] = reviewcleaner(ucm['Body'], True, False)
ucm.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,UCEAP Subreddit,1,2020-03-01 18:50:37,1,"\n\nHey guys,\nI just created a sub for UC stu...",0.0,0.0,UC Merced
1,1,Remember to vote in the primaries tomorrow!,1,2020-03-02 17:30:21,0,Remember to vote in the primaries tomorrow!,0.0,0.0,UC Merced
2,2,Career fair,1,2020-03-02 17:30:36,1,What are you guys wearing for the career fair?,0.7,0.9,UC Merced
3,3,Classifieds,1,2020-03-05 17:56:50,4,Anyone know how to be accepted into the facebo...,0.0,0.0,UC Merced
4,4,Improve student's understanding of C and data ...,1,2020-03-06 10:07:08,0,Improve student's understanding of C and data ...,0.0,0.0,UC Merced


In [15]:
#Load in Stanford Data and Rename columns
stanford = pd.read_csv('stanforddata.csv')
stanford.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
stanford['University'] = 'Stanford'
#stanford['Body'] = reviewcleaner(stanford['Body'], True, False)
stanford.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,"Hi, Need help writing a paper?",1,2020-03-01 20:32:59,0,"Hi, Need help writing a paper? I am here to he...",0.2375,0.6375,Stanford
1,1,Couldn’t be stanford,1,2020-03-02 03:01:45,0,Couldn’t be stanford,0.0,0.0,Stanford
2,2,App launch,1,2020-03-02 19:19:04,0,[removed],0.0,0.0,Stanford
3,3,Remember to vote in the primaries tomorrow!,1,2020-03-03 01:48:39,0,Remember to vote in the primaries tomorrow!,0.0,0.0,Stanford
4,4,HOT NEW APP,1,2020-03-03 03:00:23,1,https://apps.apple.com/us/app/campfire-be-your...,-0.714286,0.857143,Stanford


In [16]:
#Load in Yale Data and Rename columns
yale = pd.read_csv('yaledata.csv')
yale.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
#yale['Body'] = reviewcleaner(yale['Body'], True, False)
yale.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,Bruh,1,2020-03-01 21:47:53,0,Bruh,0.0,0.0,Yale
1,1,What kind of students are Yale looking for?,1,2020-03-02 04:07:08,5,"I want to be a better student, and my dream sc...",0.625,0.525,Yale
2,2,Social discord for mainly to socialize and mak...,1,2020-03-02 16:37:12,0,My bad if this was made but I have this discor...,0.014286,0.455952,Yale
3,3,Has anyone gone from staff to student?,1,2020-03-03 14:16:06,1,"Hope this is the correct sub to post, but I ha...",0.279167,0.579167,Yale
4,4,'Ban this technology': students protest US uni...,1,2020-03-03 16:14:48,0,'Ban this technology': students protest US uni...,0.0,0.0,Yale


In [17]:
#Load in Princeton Data and Rename columns
princeton = pd.read_csv('princetondata.csv')
princeton.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
#princeton['Body'] = reviewcleaner(princeton['Body'], True, False)
princeton.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,Bruhhh,1,2020-03-02 03:01:02,1,Bruhhh,0.0,0.0,Princeton
1,1,Whoever stole my sweaters,1,2020-03-04 21:01:15,0,This is for whichever asshole(s) stole two of ...,0.5,1.0,Princeton
2,2,Princeton Team and Merck scientists create a n...,1,2020-03-05 23:07:04,0,Princeton Team and Merck scientists create a n...,0.268182,0.577273,Princeton
3,3,Smoking weed at Princeton? Serious?!,1,2020-03-06 12:58:33,0,"Are you serious, guys? Late at night when I ta...",-0.012202,0.46131,Princeton
4,4,Smoking weed at PRINCETON? This really has to ...,1,2020-03-06 13:00:27,8,"Are you serious, guys? Late at night when I ta...",-0.012202,0.46131,Princeton


In [18]:
#Load in Cornell Data and Rename columns
cornell = pd.read_csv('cornelldata.csv')
cornell.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
cornell['University'] = 'Cornell'
#cornell['Body'] = reviewcleaner(cornell['Body'], True, False)
cornell.head()

Unnamed: 0.1,Unnamed: 0,Post ID,Title,Url,Author,Score,Publish Date,Total No. of Comments,Permalink,Flair,Body,Polarity,Subjectivity,University
0,0,fbvlvd,Anyone hate INFO 1200,https://www.reddit.com/r/Cornell/comments/fbvl...,Coconut_coke,1,2020-03-01 07:34:08,11,/r/Cornell/comments/fbvlvd/anyone_hate_info_1200/,,I always wonder who designed the courses for i...,-0.269792,0.388542,Cornell
1,1,fbwbff,Semester 1 vs semester 2,https://www.reddit.com/r/Cornell/comments/fbwb...,ellipsoidwhatdo,1,2020-03-01 08:22:54,10,/r/Cornell/comments/fbwbff/semester_1_vs_semes...,,Freshman fall semester was better than this bs...,0.5,0.5,Cornell
2,2,fbwj23,Guys with loud cars in ct,https://www.reddit.com/r/Cornell/comments/fbwj...,deadpoetic123,1,2020-03-01 08:37:00,10,/r/Cornell/comments/fbwj23/guys_with_loud_cars...,,"We get it you have a micropenis, no need to an...",0.0,0.625,Cornell
3,3,fbwlwo,For the longest time I thought ZOZI was some frat,https://www.reddit.com/r/Cornell/comments/fbwl...,ShoddyProgrammer550,1,2020-03-01 08:42:32,11,/r/Cornell/comments/fbwlwo/for_the_longest_tim...,,Like Zeta Theta Zeta Iota. Apparently it's jus...,0.05,0.35,Cornell
4,4,fbxb90,Does anyone understand what is going on in eco...,https://www.reddit.com/r/Cornell/comments/fbxb...,wondershrimp,1,2020-03-01 09:28:34,2,/r/Cornell/comments/fbxb90/does_anyone_underst...,,please help we have a test on wednesday I go t...,0.0,0.0,Cornell


In [19]:
#Load in MIT Data and Rename columns
mit = pd.read_csv('mitdata.csv')
mit.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
mit['University'] = 'MIT'
#mit['Body'] = reviewcleaner(mit['Body'], True, False)
mit.head()

Unnamed: 0.1,Unnamed: 0,Title,Score,Publish Date,Total No. of Comments,Body,Polarity,Subjectivity,University
0,0,work program for electrical engineering,1,2020-03-01 20:19:30,2,"Friends, how can I see the work program for el...",0.0,0.0,MIT
1,1,Oh come on...,1,2020-03-02 03:28:16,0,Oh come on...,0.0,0.0,MIT
2,2,Have I missed too much material to add 6.036?,1,2020-03-02 04:38:42,5,Is it too late into the semester to get caught...,0.127778,0.777778,MIT
3,3,What are some (social) MIT student clubs that ...,1,2020-03-02 20:38:43,3,"Ex: if you are an alumni, especially if you wo...",-0.0625,0.6875,MIT
4,4,Gaming Clubs?,1,2020-03-02 23:17:56,2,Hi guys! I am planning to apply to MIT and I h...,0.045,0.405,MIT


In [20]:
#Load in Harvard Data and Rename columns
harvard = pd.read_csv('harvarddata.csv')
harvard.rename(columns = {'polarity': 'Polarity', 'subjectivity': 'Subjectivity','school': 'University'}, inplace = True)
harvard['University'] = 'Harvard'
#harvard['Body'] = reviewcleaner(harvard['Body'], True, False)
harvard.head()

Unnamed: 0,Post ID,Title,Url,Author,Score,Publish Date,Total No. of Comments,Permalink,Flair,Body,University
0,fbu57a,Does commencement happen as usual?,https://www.reddit.com/r/Harvard/comments/fbu5...,JimTheBro,1,2020-03-01 05:37:37,5,/r/Harvard/comments/fbu57a/does_commencement_h...,,Seems like it's going to be a bad idea to pack...,Harvard
1,fbwhh7,Smoking weed on campus? C’mon everyone.,https://www.reddit.com/r/Harvard/comments/fbwh...,h1ss9,1,2020-03-01 08:34:03,13,/r/Harvard/comments/fbwhh7/smoking_weed_on_cam...,,[removed],Harvard
2,fc47ef,Acceptance,https://www.reddit.com/r/Harvard/comments/fc47...,ConnorK500,1,2020-03-01 17:18:12,0,/r/Harvard/comments/fc47ef/acceptance/,,[removed],Harvard
3,fc5fkw,Harvard vs other HYPSM?,https://www.reddit.com/r/Harvard/comments/fc5f...,Plus-Coast,1,2020-03-01 18:52:42,9,/r/Harvard/comments/fc5fkw/harvard_vs_other_hy...,,Was wondering what motivated you guys to pick ...,Harvard
4,fc5hnv,Couldn’t buy his way into Harvard tho,https://youtu.be/nfKLI8cklGs,Quasars_Rotate,1,2020-03-01 18:57:11,0,/r/Harvard/comments/fc5hnv/couldnt_buy_his_way...,,,Harvard


In [21]:
#Load in NYU Data and Rename columns
nyu = pd.read_csv('nyu.csv')
nyu = fillnaf(nyu)
#nyu['Body'] = reviewcleaner(nyu['Body'], True, False)

In [22]:
#Calculate polarity scores bc data doesn't include polarities
polscores = []
subscores = []
for i in range(0,9064):
    score = TextBlob(nyu["Body"][i]).polarity
    sub = TextBlob(nyu['Body'][i]).subjectivity
    polscores.append(score)
    subscores.append(sub)
nyu["Polarity"] = polscores
nyu['Subjectivity'] = subscores
nyu['University'] = 'NYU'
nyu

Unnamed: 0,Post ID,Title,Url,Author,Score,Publish Date,Total No. of Comments,Permalink,Flair,Body,Polarity,Subjectivity,University
0,fbuiwy,Advice regarding themed housing,https://www.reddit.com/r/nyu/comments/fbuiwy/a...,QuantumAsterix,1,2020-03-01 06:09:59,3,/r/nyu/comments/fbuiwy/advice_regarding_themed...,Advice,I am an incoming freshman currently applying f...,0.285490,0.576923,NYU
1,fbvzik,Any freshman that wants to room with me next y...,https://www.reddit.com/r/nyu/comments/fbvzik/a...,SolveMrReece,1,2020-03-01 08:00:51,0,/r/nyu/comments/fbvzik/any_freshman_that_wants...,,I'm (Female) going to be a senior next year an...,0.072173,0.284524,NYU
2,fbwbvi,how strict is the dorm guest policy?,https://www.reddit.com/r/nyu/comments/fbwbvi/h...,babypepper01,1,2020-03-01 08:23:46,6,/r/nyu/comments/fbwbvi/how_strict_is_the_dorm_...,,do the guards actually check to see how many t...,0.333333,0.366667,NYU
3,fbwowo,*Residence hall advice*,https://www.reddit.com/r/nyu/comments/fbwowo/r...,slothhandles166,1,2020-03-01 08:48:07,9,/r/nyu/comments/fbwowo/residence_hall_advice/,Advice,Hey! Incoming freshman here. I definitely need...,-0.132292,0.500000,NYU
4,fbwq1a,I’m confused,https://www.reddit.com/r/nyu/comments/fbwq1a/i...,wastingmysummer,1,2020-03-01 08:50:18,11,/r/nyu/comments/fbwq1a/im_confused/,Advice,Going to be a sophomore next year and I’m conf...,-0.037500,0.475000,NYU
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9059,l9tvwz,Chem textbook,https://www.reddit.com/r/nyu/comments/l9tvwz/c...,Herefortextbook,1,2021-01-31 19:41:26,2,/r/nyu/comments/l9tvwz/chem_textbook/,,[removed],0.000000,0.000000,NYU
9060,l9u16g,Digital Access Codes from Bookstore?,https://www.reddit.com/r/nyu/comments/l9u16g/d...,Charming-Fox7763,1,2021-01-31 19:49:20,2,/r/nyu/comments/l9u16g/digital_access_codes_fr...,Academics,I ordered two digital rented textbooks from th...,-0.100000,0.316667,NYU
9061,l9vzr3,Guest Spring 2021,https://www.reddit.com/r/nyu/comments/l9vzr3/g...,[deleted],1,2021-01-31 21:40:27,2,/r/nyu/comments/l9vzr3/guest_spring_2021/,,Guest Spring 2021,0.000000,0.000000,NYU
9062,l9w179,When will classes be posted in nyu class?,https://www.reddit.com/r/nyu/comments/l9w179/w...,Signal_Outside,2,2021-01-31 21:42:44,2,/r/nyu/comments/l9w179/when_will_classes_be_po...,,So I have a class starts tmrw and I still didn...,0.000000,0.000000,NYU


In [23]:
#Combine all dataframes to a single frame to get one csv for dashboard
frames = [berk, ucla, ucsd, ucm, ucsc, ucsb, riverside, davis, uci, stanford, mit, yale, princeton, cornell, harvard, nyu]
alluniversitypolarity = pd.concat(frames, ignore_index=True)

In [24]:
#Drop unnecessary columns to make dataframe more clean
alluniversitypolarity.drop(columns=['Post ID', 'Url', 'Author','Permalink','Flair', 'Unnamed: 0'], inplace = True)
alluniversitypolarity.drop(columns = ['Title','Score'], inplace=True)
alluniversitypolarity.drop(columns = ['Body'], inplace=True)

In [25]:
#Convert dataframe to one csv
alluniversitypolarity.to_csv('allunipolarity.csv')