In [1]:
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output, State
from dash import dash_table
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import dash_cytoscape as cyto
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
import networkx as nx
import plotly.express as px
from list_features import feature_properties, subreddits_to_keep, cols_to_keep
import base64

In [2]:
def processing_features(data):
    '''
    This function processing some features will use in dash
    '''
    data['YEAR'] = data['TIMESTAMP'].apply(lambda x: x.split('-')[0])
    data['LINK_SENTIMENT'] = data['LINK_SENTIMENT'].apply(lambda x: 'Neutral or Positive' if x==1 else 'Negative')


    data = (data.query('SOURCE_SUBREDDIT in @subreddits_to_keep or TARGET_SUBREDDIT in @subreddits_to_keep')
    .assign(
    prop=lambda x: x.PROPERTIES.str.split(','),
    directed_sentiment=lambda x: x.prop.apply(lambda y: float(y[20])),    
  ))


    data['undirected_edge'] = data.apply(
        lambda x: str(sorted([x.SOURCE_SUBREDDIT, x.TARGET_SUBREDDIT])),
        axis=1)

    mean_sentiment_df = (
      data.groupby('undirected_edge')
      .mean('directed_sentiment')
      .rename(columns={'directed_sentiment': 'sentiment'})
    )

    # Join mean_sentiment_df with data
    data = pd.merge(
      data,
      mean_sentiment_df,
      on='undirected_edge',
      how='left',
    )
    
    return data

def expand_data(data):
    '''
    This function expand data in more columns
    '''
    data_expand = data.PROPERTIES.str.split(",",expand=True)
    data_expand.columns = feature_properties
    data_features = pd.concat([data.drop('PROPERTIES', axis=1), data_expand], axis=1)
    data_features['directed_sentiment'] = data_features['Compound sentiment calculated by VADER']
    
    return data_features

def filter_cases_show(network_df, SUBREDDIT=False, SENTIMENT_LINK=False):
    '''
    This function prepare data will use in dash, input is the filter to apply in the dash and 
    the output is data filtered and options of filter.
    '''
    if SUBREDDIT:
        network_df = network_df.query('SOURCE_SUBREDDIT in @SUBREDDIT or TARGET_SUBREDDIT in @SUBREDDIT')

    if SENTIMENT_LINK:
        network_df = network_df.query('LINK_SENTIMENT in @SENTIMENT_LINK')

    return network_df

In [3]:
subreddits_to_keep

['bitcoin',
 'btc',
 'cryptocurrency',
 'crypto',
 'ethereum',
 'eth',
 'ethtrader',
 'litecoin',
 'monero',
 'ripple',
 'zcash',
 'cryptomarkets',
 'bitcoinmarkets',
 'altcoin',
 'icocrypto',
 'cardano',
 'bitcoinbeginners',
 'binance',
 'bitcoincash',
 'tether',
 'stratisplatform',
 'decred',
 'dash',
 'dogecoin',
 'litecointraders',
 'bitcoinuk',
 'nem',
 'stellar',
 'iota',
 'dashpay',
 'neo',
 'ethereumclassic',
 'eos',
 'lisk',
 'nanocurrency',
 'tron',
 'verge',
 'tronix',
 'coinbase',
 'defi',
 'omise_go',
 'satoshistreetBets',
 'bitcoin_com',
 'bitcoin_de',
 'bitcoin_fr',
 'bitcoin_it',
 'bitcoin_jp',
 'bitcoin_ru',
 'bitcoin_es',
 'bitcoin_uk',
 'bitcoin_us',
 'bitcoin_world',
 'bitcoin_ch',
 'bitcoin_ca',
 'bitcoin_nl',
 'bitcoinbeginners',
 'cryptotechnology',
 'blockchain',
 'crypto_currency',
 'crypto_coins',
 'crypto_currency_news']

In [4]:
#### Read the dataset and processing features will show in dash
df_title = pd.read_csv('data_title.tsv',sep='\t').loc[:, cols_to_keep]
df_body = pd.read_csv('data_body.tsv',sep='\t').loc[:, cols_to_keep]
network_df = pd.concat([df_title, df_body], axis=0).reset_index()
network_df = processing_features(network_df)
network_df = expand_data(network_df)

hhhhhhh
undirected_edge
['100yearsago', 'bitcoin']      -0.8176
['1984isreality', 'bitcoin']    -0.2732
['1broker', 'bitcoinmarkets']   -0.6136
['1coinpool', 'dogecoin']        0.0000
['2014', 'dogecoin']             0.4199
                                  ...  
['ripple', 'virtacoin']          0.9899
['ripple', 'xrptrader']          0.0000
['ripplers', 'stellar']          0.0000
['stellar', 'thestellar']        0.0000
['tron', 'warhammer40k']         0.0000
Name: sentiment, Length: 3608, dtype: float64


In [7]:
network_df.shape

(14448, 98)

In [8]:
filtered_network_df = (
  network_df
  .query('SOURCE_SUBREDDIT in @subreddits_to_keep and TARGET_SUBREDDIT in @subreddits_to_keep')
)

In [12]:
# save df as csv
filtered_network_df.to_csv('filtered_and_treated_dataset.csv', index=False)

In [13]:
(
  pd.read_csv('filtered_and_treated_dataset.csv')
)

Unnamed: 0,index_x,SOURCE_SUBREDDIT,TARGET_SUBREDDIT,POST_ID,TIMESTAMP,LINK_SENTIMENT,YEAR,prop,directed_sentiment,undirected_edge,...,LIWC_Achiev,LIWC_Leisure,LIWC_Home,LIWC_Money,LIWC_Relig,LIWC_Death,LIWC_Assent,LIWC_Dissent,LIWC_Nonflu,LIWC_Filler
0,270,cryptocurrency,bitcoin,1u8bw5s,2014-01-02 06:05:31,Neutral or Positive,2014,"['89.0', '76.0', '0.797752808989', '0.0', '0.1...",0.0000,"['bitcoin', 'cryptocurrency']",...,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000
1,875,litecoin,bitcoin,1uerggs,2014-01-04 12:02:35,Neutral or Positive,2014,"['129.0', '109.0', '0.77519379845', '0.0', '0....",0.4404,"['bitcoin', 'litecoin']",...,0.000000,0.000000,0.000000,0.041667,0.0,0.000000,0.0,0.000000,0.000000,0.000000
2,1050,coinbase,bitcoin,1uh1sss,2014-01-05 13:01:21,Neutral or Positive,2014,"['71.0', '65.0', '0.87323943662', '0.0', '0.09...",0.0000,"['bitcoin', 'coinbase']",...,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000
3,1531,coinbase,bitcoin,1ult8os,2014-01-06 14:39:15,Neutral or Positive,2014,"['98.0', '86.0', '0.765306122449', '0.0', '0.1...",-0.4767,"['bitcoin', 'coinbase']",...,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000
4,1557,dogecoin,bitcoin,1um1yos,2014-01-07 00:38:01,Neutral or Positive,2014,"['97.0', '82.0', '0.814432989691', '0.0', '0.1...",0.0000,"['bitcoin', 'dogecoin']",...,0.055556,0.000000,0.000000,0.055556,0.0,0.055556,0.0,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2235,286268,ethereum,ethtrader,68b9wfs,2017-04-29 12:43:40,Neutral or Positive,2017,"['458.0', '400.0', '0.805676855895', '0.006550...",0.9081,"['ethereum', 'ethtrader']",...,0.028571,0.000000,0.000000,0.042857,0.0,0.000000,0.0,0.000000,0.014286,0.000000
2236,286302,ethtrader,btc,68ccems,2017-04-29 16:16:04,Neutral or Positive,2017,"['994.0', '860.0', '0.775653923541', '0.009054...",0.8986,"['btc', 'ethtrader']",...,0.012346,0.000000,0.000000,0.018519,0.0,0.006173,0.0,0.006173,0.000000,0.006173
2237,286431,ethereum,litecoin,68g3d3s,2017-04-30 08:45:55,Neutral or Positive,2017,"['447.0', '396.0', '0.805369127517', '0.008948...",0.0000,"['ethereum', 'litecoin']",...,0.000000,0.014706,0.000000,0.073529,0.0,0.000000,0.0,0.000000,0.000000,0.000000
2238,286542,ethereum,cryptocurrency,68ia9es,2017-04-30 15:30:32,Neutral or Positive,2017,"['674.0', '572.0', '0.783382789318', '0.004451...",0.9914,"['cryptocurrency', 'ethereum']",...,0.034783,0.000000,0.000000,0.069565,0.0,0.008696,0.0,0.008696,0.000000,0.000000
