# Reddit r/NFT Sentiment Data

In [351]:
#
import os
import numpy as np
import pandas as pd
from pandas import Timestamp
import seaborn as sns
import matplotlib.pyplot as plt
#
import datetime
from dateutil.relativedelta import relativedelta
import requests
import praw
from psaw import PushshiftAPI
#
import nltk
import re
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
import emoji

Authentication (using PRAW and PSAW since PRAW allows only 1000 responses)

In [75]:
with open("KEY", 'r') as file:
    CLIENT_ID, CLIENT_SECRET = file.read().splitlines()

reddit = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    redirect_uri="http://localhost:8080",
    user_agent="NFT_Sent:v1.0 (by u/Sharp_Source3201)",
)
print(reddit.auth.url(["identity"], "...", "permanent"))

api = PushshiftAPI(reddit)

https://www.reddit.com/api/v1/authorize?client_id=PwwNhIZjaOAKPHnD15vVsg&duration=permanent&redirect_uri=http%3A%2F%2Flocalhost%3A8080&response_type=code&scope=identity&state=...


Post parse function

In [242]:
def get_features(submission):
    date = datetime.datetime.fromtimestamp(submission.created).date()
    title = submission.title
    main_text = submission.selftext
    
    return date, title, main_text

Subreddit search submission through PSAW (returns Submission objects), segmenting for time (~2.5 hours total)

In [277]:
%%time
#start_epoch=int(datetime.datetime(2017, 10, 2).timestamp())
start_epoch=int(datetime.datetime(2022, 4, 17).timestamp())
end_epoch=int(datetime.datetime(2022, 4, 18).timestamp())
#end_epoch=int(datetime.datetime(2022, 4, 17).timestamp())

query = "NOT title:giveaway"

request_results = list(api.search_submissions(#q=query,
                            after=start_epoch,
                            before=end_epoch,
                            subreddit="NFT",
                            filter=['title', 'selftext'],
                            ))

#subreddit = reddit.subreddit("NFT")

CPU times: total: 1.09 s
Wall time: 5.71 s


In [278]:
len(request_results)

453

In [260]:
df = pd.DataFrame([get_features(sub) for sub in request_results], columns=["Date", "Title", "Selftext"])

## Cleaning Main Data

In [293]:
df = pd.read_csv("../data/signals/Reddit_NFT_Text.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Title,Selftext
0,0,2020-12-01,Robin Williams LU-ME Lamp,Just dropped my first NFT on [@rariblecom](htt...
1,1,2020-12-01,How to create your first NFT in 1 minute,
2,2,2020-12-01,"The Six Dragons: PS5, DeFi, Yield Farming",
3,3,2020-12-01,A new NFT with a low supply coin and NFT staki...,
4,4,2020-12-01,Read more about the first mind-controlled NFT ...,


Turn date column into datetime and sort

In [294]:
df['Date'] = pd.to_datetime(df['Date'])

In [308]:
df = df.sort_values("Date").drop(df.columns[0], axis=1)
df.head()

Unnamed: 0,Date,Title,Selftext
459,2019-02-28,A NonFungible Token Stampede Is Coming – Coinm...,
458,2019-02-28,Decentraland Creator Contest - Building on NFT...,
457,2019-02-28,NFTY News,
456,2019-02-28,‘No Wallet Needed’: Mobile Cryptopunk Game,
454,2019-07-06,Yat Siu - NFT is why content will be king again,


In [310]:
df.index = pd.DatetimeIndex(df["Date"])
df.head()

Unnamed: 0_level_0,Date,Title,Selftext
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-02-28,2019-02-28,A NonFungible Token Stampede Is Coming – Coinm...,
2019-02-28,2019-02-28,Decentraland Creator Contest - Building on NFT...,
2019-02-28,2019-02-28,NFTY News,
2019-02-28,2019-02-28,‘No Wallet Needed’: Mobile Cryptopunk Game,
2019-07-06,2019-07-06,Yat Siu - NFT is why content will be king again,


In [311]:
df = df.drop("Date", axis=1)
df.head()

Unnamed: 0_level_0,Title,Selftext
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-28,A NonFungible Token Stampede Is Coming – Coinm...,
2019-02-28,Decentraland Creator Contest - Building on NFT...,
2019-02-28,NFTY News,
2019-02-28,‘No Wallet Needed’: Mobile Cryptopunk Game,
2019-07-06,Yat Siu - NFT is why content will be king again,


Output

In [312]:
df.to_csv("../data/signals/Reddit_NFT_Text.csv")

## Applying Sentiment Analysis

In [315]:
df = pd.read_csv("../data/signals/Reddit_NFT_Text.csv")
df.head()

Unnamed: 0,Date,Title,Selftext
0,2019-02-28,A NonFungible Token Stampede Is Coming – Coinm...,
1,2019-02-28,Decentraland Creator Contest - Building on NFT...,
2,2019-02-28,NFTY News,
3,2019-02-28,‘No Wallet Needed’: Mobile Cryptopunk Game,
4,2019-07-06,Yat Siu - NFT is why content will be king again,


Dealing with emojis in text, sentiment data for all of them is not available so just convert them to text (Demojize)

In [371]:
test_string = df.iloc[267717, :].Title
test_string = emoji.demojize(test_string, delimiters=("","")).replace("_", " ")

spacy pipeline

In [380]:
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe('spacytextblob')

doc = nlp(df["Title"][4])
doc._.blob.polarity 

Sentiment(polarity=0.0, subjectivity=0.0, assessments=[])

In [374]:
df["Title"][4]

'Yat Siu - NFT is why content will be king again'

[]