<h1>Depression Detection in Social Media Text using Convolutional Neural Network</h1>
<b> By Ezra Abah</b>

In [1]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.stem import PorterStemmer 
from nltk.tokenize import word_tokenize
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from textblob import Word

In [2]:
data=pd.read_csv("C:/Users/Admin/Documents/Datasets/tweets_16million.csv", encoding = "ISO-8859-1", names=["target","id","date","flag","user","text"])

In [3]:
data.head()

Unnamed: 0,target,id,date,flag,user,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."


In [4]:
#Observe size of dataset
shape = data.shape
print("The size of this data set is "+ str(shape))
data.groupby('target').count()

The size of this data set is (1600000, 6)


Unnamed: 0_level_0,id,date,flag,user,text
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,800000,800000,800000,800000,800000
4,800000,800000,800000,800000,800000


In [5]:
#view datatypes of individual columns

types = data.dtypes
print(types)

target     int64
id         int64
date      object
flag      object
user      object
text      object
dtype: object


In [6]:
data['text'].head()

0    @switchfoot http://twitpic.com/2y1zl - Awww, t...
1    is upset that he can't update his Facebook by ...
2    @Kenichan I dived many times for the ball. Man...
3      my whole body feels itchy and like its on fire 
4    @nationwideclass no, it's not behaving at all....
Name: text, dtype: object

In [7]:
# Check for empty cells
data.isnull().sum()

target    0
id        0
date      0
flag      0
user      0
text      0
dtype: int64

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1600000 entries, 0 to 1599999
Data columns (total 6 columns):
 #   Column  Non-Null Count    Dtype 
---  ------  --------------    ----- 
 0   target  1600000 non-null  int64 
 1   id      1600000 non-null  int64 
 2   date    1600000 non-null  object
 3   flag    1600000 non-null  object
 4   user    1600000 non-null  object
 5   text    1600000 non-null  object
dtypes: int64(2), object(4)
memory usage: 73.2+ MB


In [10]:
# Remove_username
def remove_usernames(text):
    new_text=text.apply(lambda x: re.sub('@[\w]+','', str(x)))
    return new_text
data['text']=remove_usernames(data['text'])

In [12]:
def remove_hyperlinks(text):
    """Remove punctuation from text"""
    new_text = text.apply(lambda x: re.sub(r'https?:\/\/(www\.)?[-a-zA-Z0–9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0–9@:%_\+.~#?&//=]*)', '', str(x), flags=re.MULTILINE))
    new_text = text.apply(lambda x: re.sub(r'[-a-zA-Z0–9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0–9@:%_\+.~#?&//=]*)', '', str(x), flags=re.MULTILINE))
    return new_text
data['text']=remove_hyperlinks(data['text'])
data['text'].head

<bound method NDFrame.head of 0           httptwitpiccom2y1zl  Awww thats a bummer  You...
1          is upset that he cant update his Facebook by t...
2           I dived many times for the ball Managed to sa...
3            my whole body feels itchy and like its on fire 
4           no its not behaving at all im mad why am i he...
                                 ...                        
1599995    Just woke up Having no school is the best feel...
1599996    TheWDBcom  Very cool to hear old Walt intervie...
1599997    Are you ready for your MoJo Makeover Ask me fo...
1599998    Happy 38th Birthday to my boo of alll time Tup...
1599999                             happy charitytuesday    
Name: text, Length: 1600000, dtype: object>

In [11]:
def remove_punctuation(text):
    """Remove punctuation from text"""
    new_text=text.str.replace('[^\w\s]','')
    return new_text
data['text']=remove_punctuation(data['text'])
data['text'].head

<bound method NDFrame.head of 0           httptwitpiccom2y1zl  Awww thats a bummer  You...
1          is upset that he cant update his Facebook by t...
2           I dived many times for the ball Managed to sa...
3            my whole body feels itchy and like its on fire 
4           no its not behaving at all im mad why am i he...
                                 ...                        
1599995    Just woke up Having no school is the best feel...
1599996    TheWDBcom  Very cool to hear old Walt intervie...
1599997    Are you ready for your MoJo Makeover Ask me fo...
1599998    Happy 38th Birthday to my boo of alll time Tup...
1599999                             happy charitytuesday    
Name: text, Length: 1600000, dtype: object>

In [13]:
def remove_stopwords(text):
    """Remove stopwords from text"""
    nltk.download('stopwords')
    stop = stopwords.words('english')
    new_text= text.apply(lambda x: " ".join(x for x in x.split() if x not in stop))
    return new_text
data['text']=remove_stopwords(data['text'])
data['text'].head

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


<bound method NDFrame.head of 0          httptwitpiccom2y1zl Awww thats bummer You shou...
1          upset cant update Facebook texting might cry r...
2          I dived many times ball Managed save 50 The re...
3                           whole body feels itchy like fire
4                                 behaving im mad I cant see
                                 ...                        
1599995            Just woke Having school best feeling ever
1599996    TheWDBcom Very cool hear old Walt interviews â...
1599997                  Are ready MoJo Makeover Ask details
1599998    Happy 38th Birthday boo alll time Tupac Amaru ...
1599999                                 happy charitytuesday
Name: text, Length: 1600000, dtype: object>

In [14]:
def remove_digits(text):
    """Removes digit present in text"""
    new_text=text.apply(lambda x: re.sub(r"\d",'', str(x)))
    return new_text
data['text']=remove_digits(data['text'])
data['text'].head

<bound method NDFrame.head of 0          httptwitpiccomyzl Awww thats bummer You should...
1          upset cant update Facebook texting might cry r...
2          I dived many times ball Managed save  The rest...
3                           whole body feels itchy like fire
4                                 behaving im mad I cant see
                                 ...                        
1599995            Just woke Having school best feeling ever
1599996    TheWDBcom Very cool hear old Walt interviews â...
1599997                  Are ready MoJo Makeover Ask details
1599998    Happy th Birthday boo alll time Tupac Amaru Sh...
1599999                                 happy charitytuesday
Name: text, Length: 1600000, dtype: object>

In [15]:
def lower_case(text):
    """changes every text to lower case"""
    new_text = text.apply(lambda x: " ".join(x.lower() for x in x.split()))
    return new_text
data['text']=lower_case(data['text'])
data['text'].head

<bound method NDFrame.head of 0          httptwitpiccomyzl awww thats bummer you should...
1          upset cant update facebook texting might cry r...
2          i dived many times ball managed save the rest ...
3                           whole body feels itchy like fire
4                                 behaving im mad i cant see
                                 ...                        
1599995            just woke having school best feeling ever
1599996    thewdbcom very cool hear old walt interviews â...
1599997                  are ready mojo makeover ask details
1599998    happy th birthday boo alll time tupac amaru sh...
1599999                                 happy charitytuesday
Name: text, Length: 1600000, dtype: object>

In [16]:
def lemmatize(text):
    """Remove punctuation from text"""
    new_text=text.apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))
    return new_text
data['text']=lemmatize(data['text'])
data['text'].head

<bound method NDFrame.head of 0          httptwitpiccomyzl awww thats bummer you should...
1          upset cant update facebook texting might cry r...
2          i dived many time ball managed save the rest g...
3                            whole body feel itchy like fire
4                                 behaving im mad i cant see
                                 ...                        
1599995            just woke having school best feeling ever
1599996    thewdbcom very cool hear old walt interview â ...
1599997                   are ready mojo makeover ask detail
1599998    happy th birthday boo alll time tupac amaru sh...
1599999                                 happy charitytuesday
Name: text, Length: 1600000, dtype: object>

In [None]:
# Stemmatize
#def word_stemmer(text):
#    stem_text = [PorterStemmer().stem(i) for i in text]
#    return stem_text
#data['text'] = data['text'].apply(lambda x: word_stemmer(x))
#data.head()


#stem_text = PorterStemmer()
#data['text']=data['text'].apply(lambda x: " ".join([stem_text.stem(word) for word in x.split()]))

## Exploratory Data Analysis