## Preparation

**Only uncomment and run the code block below if this is your first time running this notebook**

In [0]:
# import sys
#
# !pip install pyarrow pandas arrow swifter plotly gensim spacy openpyxl pyecharts
# !{sys.executable} -m spacy download en_core_web_lg

In [6]:
from lib.utils import write_html
from pyecharts.globals import CurrentConfig, NotebookType

CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB

## 1. Load raw tweet dataset

In [3]:
import pyarrow.feather as ft
data_raw = ft.read_feather('data/raw/raw.feather')
print(f'Total tweets: {data_raw.shape[0]}')
print(f'English tweets: {data_raw[data_raw.lang == "en"].shape[0]} [{data_raw[data_raw.lang == "en"].shape[0] / data_raw.shape[0]}]')

Total tweets: 1315836
English tweets: 1119015 [0.850421329101803]


## 2. Integrate supplementary retrieval

In [17]:
import pyarrow.feather as pf
import arrow
import pandas
import swifter
import plotly.express as px

data_tweetnlp = pf.read_feather('data/raw/en_sent_label_tweetnlp.feather')
data_tweetnlp.created_at = data_tweetnlp.created_at.swifter.apply(
            lambda x: arrow.get(x).datetime.replace(tzinfo=None)
        )
data_tweetnlp.index = data_tweetnlp.created_at
data_tweetnlp = data_tweetnlp.drop(columns=["created_at"])
data_tweetnlp = data_tweetnlp.groupby(['tweetnlp_sentiment_label', pandas.Grouper(freq='M')]).count()
data_tweetnlp = data_tweetnlp.reset_index()
fig = px.line(data_tweetnlp, x='created_at', y='status_id', color='tweetnlp_sentiment_label', labels={'tweetnlp_sentiment_label': '<b>Sentiment</b>', 'status_id': '<b>Number of tweets</b>', 'created_at': '<b>Time</b>'}, template='plotly_white')
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))
write_html(fig, 'en-sentiment-with-tweetnlp')
fig

Dask Apply:   0%|          | 0/48 [00:00<?, ?it/s]

In [8]:
data_tweetnlp.groupby(by=['tweetnlp_sentiment_label']).sum()['status_id']

tweetnlp_sentiment_label
negative    238457
neutral     621257
positive    259301
Name: status_id, dtype: int64

In [9]:
total = 238457+621257+259301
print(238457/total*100)
print(621257/total*100)
print(259301/total*100)

21.309544554809364
55.51820127522866
23.172254169961974


In [2]:
import pyarrow.feather as pf
from gensim.models import LdaModel
from lib.dtc import get_topics, nlp_preprocess
import spacy
import pandas
import swifter
from tqdm.notebook import tqdm
tqdm.pandas()

spacy_nlp = spacy.load('en_core_web_lg')
lda_model = LdaModel.load('data/models/n8/n8.model')
data_tweetnlp = pf.read_feather('data/raw/en_sent_label_tweetnlp.feather')
corpus, dictionary, _ = nlp_preprocess(
                data_tweetnlp["text"].tolist(), spacy_nlp.Defaults.stop_words
            )
data_tweetnlp[
            [
                "topic",
                "topic1",
                "topic2",
                "topic3",
                "topic4",
                "topic5",
                "topic6",
                "topic7",
                "topic8",
            ]
        ] = data_tweetnlp.text.progress_apply(
            lambda t: get_topics(
                t, lda_model, dictionary, spacy_nlp.Defaults.stop_words
            )
        )
data_tweetnlp.to_feather('data/processed/en_sent_label_tweetnlp_topic.feather')

  0%|          | 0/1119015 [00:00<?, ?it/s]

In [7]:
import pyarrow.feather as pf
import arrow
data_tweetnlp = pf.read_feather('data/processed/en_sent_label_tweetnlp_topic.feather')
data_tweetnlp.created_at = data_tweetnlp.created_at.swifter.apply(
            lambda x: arrow.get(x).datetime.replace(tzinfo=None)
        )
data_tweetnlp.index = data_tweetnlp.created_at
data_tweetnlp = data_tweetnlp.drop(columns=["created_at"])
data_tweetnlp = data_tweetnlp.groupby(['tweetnlp_sentiment_label', 'topic', pandas.Grouper(freq='M')]).count()
data_tweetnlp = data_tweetnlp.reset_index()
data_tweetnlp

Dask Apply:   0%|          | 0/48 [00:00<?, ?it/s]

Unnamed: 0,tweetnlp_sentiment_label,topic,created_at,user_id,status_id,screen_name,text,source,display_text_width,reply_to_status_id,...,tweetnlp_sentiment_negative,tweetnlp_sentiment_neutral,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8
0,negative,1.0,2007-07-31,1,1,1,1,1,0,1,...,1,1,1,1,1,1,1,1,1,1
1,negative,1.0,2007-11-30,1,1,1,1,1,0,0,...,1,1,1,1,1,1,1,1,1,1
2,negative,1.0,2008-01-31,1,1,1,1,1,0,1,...,1,1,1,1,1,1,1,1,1,1
3,negative,1.0,2008-02-29,2,2,2,2,2,0,1,...,2,2,2,2,2,2,2,2,2,2
4,negative,1.0,2008-03-31,2,2,2,2,2,0,0,...,2,2,2,2,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3044,positive,8.0,2020-01-31,9,9,9,9,9,4,1,...,9,9,9,9,9,9,9,9,9,9
3045,positive,8.0,2020-02-29,7,7,7,7,7,3,2,...,7,7,7,7,7,7,7,7,7,7
3046,positive,8.0,2020-03-31,26,26,26,26,26,9,1,...,26,26,26,26,26,26,26,26,26,26
3047,positive,8.0,2020-04-30,10,10,10,10,10,1,1,...,10,10,10,10,10,10,10,10,10,10


In [10]:
data_tweetnlp[(data_tweetnlp.tweetnlp_sentiment_label == 'positive') & (data_tweetnlp.topic == 1)]

Unnamed: 0,tweetnlp_sentiment_label,topic,created_at,user_id,status_id,screen_name,text,source,display_text_width,reply_to_status_id,...,tweetnlp_sentiment_negative,tweetnlp_sentiment_neutral,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8
2009,positive,1.0,2007-06-30,1,1,1,1,1,0,0,...,1,1,1,1,1,1,1,1,1,1
2010,positive,1.0,2007-07-31,1,1,1,1,1,0,0,...,1,1,1,1,1,1,1,1,1,1
2011,positive,1.0,2007-11-30,10,10,10,10,10,0,0,...,10,10,10,10,10,10,10,10,10,10
2012,positive,1.0,2007-12-31,5,5,5,5,5,0,1,...,5,5,5,5,5,5,5,5,5,5
2013,positive,1.0,2008-01-31,6,6,6,6,6,0,1,...,6,6,6,6,6,6,6,6,6,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2156,positive,1.0,2020-01-31,1191,1191,1191,1191,1191,312,177,...,1191,1191,1191,1191,1191,1191,1191,1191,1191,1191
2157,positive,1.0,2020-02-29,453,453,453,453,453,202,125,...,453,453,453,453,453,453,453,453,453,453
2158,positive,1.0,2020-03-31,532,532,532,532,532,208,106,...,532,532,532,532,532,532,532,532,532,532
2159,positive,1.0,2020-04-30,381,381,381,381,381,169,115,...,381,381,381,381,381,381,381,381,381,381


In [26]:
import plotly.graph_objects as go
fig = go.Figure()
for i in range(1,9):
    fig.add_trace(
        go.Scatter(x=data_tweetnlp[(data_tweetnlp.tweetnlp_sentiment_label == 'positive') & (data_tweetnlp.topic == i)].created_at, y=data_tweetnlp[(data_tweetnlp.tweetnlp_sentiment_label == 'positive') & (data_tweetnlp.topic == i)].status_id, name=f'Topic {i}')
    )
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
), template='plotly_white', title='<b>Number of positive tweets per month</b>')
fig.update_yaxes(title='<b>Number of tweets</b>')
write_html(fig, 'en-sentiment-topic-positive')
fig

In [27]:
import plotly.graph_objects as go
fig = go.Figure()
for i in range(1,9):
    fig.add_trace(
        go.Scatter(x=data_tweetnlp[(data_tweetnlp.tweetnlp_sentiment_label == 'negative') & (data_tweetnlp.topic == i)].created_at, y=data_tweetnlp[(data_tweetnlp.tweetnlp_sentiment_label == 'negative') & (data_tweetnlp.topic == i)].status_id, name=f'Topic {i}')
    )
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
), template='plotly_white', title='<b>Number of negative tweets per month</b>')
fig.update_yaxes(title='<b>Number of tweets</b>')
write_html(fig, 'en-sentiment-topic-negative')
fig

In [9]:
import pyarrow.feather as pf
import arrow
import pandas
import swifter
data_tweetnlp = pf.read_feather('data/processed/en_sent_label_tweetnlp_topic.feather')
data_tweetnlp['tweetnlp_sentiment_score'] = 0 + data_tweetnlp['tweetnlp_sentiment_positive'] - data_tweetnlp['tweetnlp_sentiment_negative']
data_tweetnlp.created_at = data_tweetnlp.created_at.swifter.apply(
            lambda x: arrow.get(x).datetime.replace(tzinfo=None)
        )
data_tweetnlp.index = data_tweetnlp.created_at
data_tweetnlp = data_tweetnlp.drop(columns=["created_at"])
data_tweetnlp = data_tweetnlp.groupby(['topic', pandas.Grouper(freq='M')]).mean()
data_tweetnlp = data_tweetnlp.reset_index()
data_tweetnlp

Dask Apply:   0%|          | 0/48 [00:00<?, ?it/s]

Unnamed: 0,topic,created_at,user_id,status_id,display_text_width,reply_to_status_id,reply_to_user_id,is_quote,is_retweet,favorite_count,...,tweetnlp_sentiment_neutral,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,tweetnlp_sentiment_score
0,1.0,2007-05-31,3.379697e+06,7.519302e+07,,,,0.00000,0.000000,0.000000,...,0.722586,0.236072,0.118224,0.076845,0.078521,0.140595,0.146413,0.097288,0.106042,0.243245
1,1.0,2007-06-30,1.013981e+06,9.064882e+07,,,,0.00000,0.000000,0.000000,...,0.285077,0.187254,0.181082,0.053295,0.121652,0.108785,0.136133,0.099108,0.112690,0.690127
2,1.0,2007-07-31,8.041925e+05,1.437403e+08,,1.437494e+08,8.195220e+05,0.00000,0.000000,0.000000,...,0.290286,0.217794,0.176254,0.087059,0.135919,0.095955,0.167254,0.048688,0.071076,-0.212814
3,1.0,2007-09-30,6.219462e+06,2.676662e+08,,,,0.00000,0.000000,0.000000,...,0.803949,0.233795,0.141727,0.100386,0.085906,0.100409,0.181461,0.073278,0.083039,0.051200
4,1.0,2007-10-31,8.454820e+05,3.426918e+08,,,,0.00000,0.000000,0.333333,...,0.815241,0.262913,0.138521,0.075600,0.119473,0.074650,0.159787,0.076852,0.092204,0.059421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1148,8.0,2020-01-31,4.157469e+17,1.217532e+18,107.800000,1.217844e+18,3.344083e+17,0.04878,0.365854,1.170732,...,0.518699,0.163721,0.126756,0.102128,0.090876,0.070040,0.164921,0.051296,0.230262,0.030306
1149,8.0,2020-02-29,3.498468e+17,1.228155e+18,86.142857,1.225475e+18,4.142219e+17,0.00000,0.153846,1.230769,...,0.482717,0.173516,0.129916,0.113092,0.113048,0.055845,0.139188,0.044470,0.230924,0.053772
1150,8.0,2020-03-31,3.406291e+17,1.238411e+18,118.666667,1.237357e+18,3.313137e+17,0.00000,0.657895,2.789474,...,0.521835,0.158069,0.112559,0.104142,0.082837,0.058972,0.189176,0.052480,0.241765,0.209105
1151,8.0,2020-04-30,4.723100e+17,1.250527e+18,106.750000,1.251185e+18,3.149293e+17,0.00000,0.571429,0.380952,...,0.525942,0.143716,0.122751,0.141895,0.105220,0.066013,0.147298,0.041107,0.232001,0.270356


In [15]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=8, cols=1)
for i in range(1,9):
    fig.add_trace(
        go.Bar(x=data_tweetnlp[data_tweetnlp.topic==i].created_at, y=data_tweetnlp[data_tweetnlp.topic==i].tweetnlp_sentiment_score, name=f'<b>Topic {i}</b>'),
        row=i, col=1
    )
fig.update_layout(template='plotly_white', title='<b>Sentiment score of English tweets by topic</b><br>-1: most negative, 1: most positive')
write_html(fig, 'en-sentiment-score-topic')
fig

In [16]:
import pyarrow.feather as pf
import arrow
import pandas
import swifter
data_tweetnlp = pf.read_feather('data/processed/en_sent_label_tweetnlp_topic.feather')
data_tweetnlp.created_at = data_tweetnlp.created_at.swifter.apply(
            lambda x: arrow.get(x).datetime.replace(tzinfo=None)
        )
data_tweetnlp.index = data_tweetnlp.created_at
data_tweetnlp = data_tweetnlp.drop(columns=["created_at"])
data_tweetnlp = data_tweetnlp.groupby(['topic', pandas.Grouper(freq='M')]).count()
data_tweetnlp = data_tweetnlp.reset_index()
data_tweetnlp

Dask Apply:   0%|          | 0/48 [00:00<?, ?it/s]

Unnamed: 0,topic,created_at,user_id,status_id,screen_name,text,source,display_text_width,reply_to_status_id,reply_to_user_id,...,tweetnlp_sentiment_negative,tweetnlp_sentiment_neutral,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8
0,1.0,2007-05-31,29,29,29,29,29,0,0,0,...,29,29,29,29,29,29,29,29,29,29
1,1.0,2007-06-30,1,1,1,1,1,0,0,0,...,1,1,1,1,1,1,1,1,1,1
2,1.0,2007-07-31,2,2,2,2,2,0,1,1,...,2,2,2,2,2,2,2,2,2,2
3,1.0,2007-09-30,4,4,4,4,4,0,0,0,...,4,4,4,4,4,4,4,4,4,4
4,1.0,2007-10-31,3,3,3,3,3,0,0,0,...,3,3,3,3,3,3,3,3,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1148,8.0,2020-01-31,41,41,41,41,41,20,12,16,...,41,41,41,41,41,41,41,41,41,41
1149,8.0,2020-02-29,26,26,26,26,26,14,13,13,...,26,26,26,26,26,26,26,26,26,26
1150,8.0,2020-03-31,76,76,76,76,76,15,7,8,...,76,76,76,76,76,76,76,76,76,76
1151,8.0,2020-04-30,42,42,42,42,42,12,10,13,...,42,42,42,42,42,42,42,42,42,42


In [18]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=8, cols=1)
for i in range(1,9):
    fig.add_trace(
        go.Bar(x=data_tweetnlp[data_tweetnlp.topic==i].created_at, y=data_tweetnlp[data_tweetnlp.topic==i].status_id, name=f'<b>Topic {i}</b>'),
        row=i, col=1
    )
fig.update_layout(template='plotly_white', title='<b>Number of English tweets per month by topic</b>')
write_html(fig, 'en-topic-per-month')
fig

In [22]:
import spacy
import pandas
import nltk
from nltk.stem import WordNetLemmatizer
from lib.dtc import tokenizer

from typing import Tuple
from collections import Counter
import itertools
spacy_nlp = spacy.load('en_core_web_lg')

nltk.download('omw-1.4')
import pyarrow.parquet as pq
data = pq.read_table('data/raw/en_sent_with_topic8.parquet').to_pandas()
lemmatizer = WordNetLemmatizer()
tweets = data.text.tolist()
words = []
for tweet in tweets:
    tweet = tweet.replace("#", "")
    # words.append([w for w in tokenizer(tweet, spacy_nlp.Defaults.stop_words) if w not in ['23andme', 'ancestrydna']])
    words.append(tokenizer(tweet, spacy_nlp.Defaults.stop_words))

def combine_two(word_list):
    word_list = [w for w in word_list if w and w not in ['23andme', 'ancestrydna']]
    return [' '.join(word_list[i-1:i+1]) for i in range(1, len(word_list))]

grouped_words = [combine_two(w) for w in words]
grouped_words = list(itertools.chain(*grouped_words))
group_freq = Counter(grouped_words)
top25_grouped = [{"term": g[0].__str__(), "count": g[1]} for g in group_freq.most_common(25)]
df_top25_grouped = pandas.DataFrame(top25_grouped).sort_values(by='count', ascending=False)
filtered_words = [word for word in [lemmatizer.lemmatize(word) for word in list(itertools.chain(*words))] if word not in ['23andme', 'ancestrydna']]

word_freq = Counter(filtered_words)
top25_words = [{"term": w[0], "count": w[1]} for w in word_freq.most_common(25)]
df_top25_words = pandas.DataFrame(top25_words).sort_values(by='count', ascending=False)

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Bo\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [23]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig_top25 = make_subplots(rows=1, cols=2, horizontal_spacing=0.15)
fig_top25.add_trace(
    go.Bar(x=df_top25_words['count'], y=df_top25_words['term'], name='<b>Top 25 most frequent word combinations</b>', orientation='h'), row=1, col=1
)
fig_top25.add_trace(
    go.Bar(x=df_top25_grouped['count'], y=df_top25_grouped['term'], name='<b>Top 25 most frequent words</b>', orientation='h'), row=1, col=2
)
fig_top25.update_yaxes(autorange='reversed', tickfont_family="Arial Black")
fig_top25.update_layout(template='seaborn', legend=dict(xanchor='right', yanchor='bottom'))
fig_top25.show()
fig_top25.write_html('output/Fig4_top25terms.html')

## 4. Visualising categorised 'I am' tweets

In [7]:
import pandas
df_iam = pandas.read_excel('data/processed/en_iam.xlsx')
df_iam.category1 = df_iam.category1.str.strip()
df_iam.category2 = df_iam.category2.str.strip()
df_iam['sub-category1'] = df_iam['sub-category1'].str.strip()
df_iam['sub-category2'] = df_iam['sub-category2'].str.strip()
df_iam[['category1', 'category2']] = df_iam[['category1', 'category2']].fillna(value='')

In [8]:
df_iam.groupby(['category1', 'category2'], dropna=False).count()['status_id'].reset_index()

Unnamed: 0,category1,category2,status_id
0,,,1
1,adoption,,1
2,adoption,health,1
3,ancestry,,401
4,ancestry,adoption,7
...,...,...,...
63,taking a test,identity,4
64,taking a test,mismatch,1
65,taking a test,opinion_23&me,2
66,taking a test,questions,1


In [9]:
axis_data = list(set(df_iam.category1.unique().tolist() + df_iam.category2.unique().tolist()))
iam_data = df_iam.groupby(['category1', 'category2']).count()['status_id'].reset_index().values.tolist()
iam_data

[['', '', 1],
 ['adoption', '', 1],
 ['adoption', 'health', 1],
 ['ancestry', '', 401],
 ['ancestry', 'adoption', 7],
 ['ancestry', 'concerns', 5],
 ['ancestry', 'curiosity', 1],
 ['ancestry', 'disbelief', 1],
 ['ancestry', 'excitement', 5],
 ['ancestry', 'health', 1],
 ['ancestry', 'humour', 7],
 ['ancestry', 'identity', 4],
 ['ancestry', 'lifestyle', 1],
 ['ancestry', 'mismatch', 13],
 ['ancestry', 'opinion_23&me', 1],
 ['ancestry', 'recommentadion', 1],
 ['ancestry', 'surprise', 4],
 ['ancestry', 'taking a test', 4],
 ['concerns', '', 62],
 ['concerns', 'ancestry', 3],
 ['concerns', 'excitement', 4],
 ['concerns', 'health', 1],
 ['concerns', 'humour', 1],
 ['concerns', 'investors', 1],
 ['concerns', 'skepticism', 1],
 ['concerns', 'taking a test', 1],
 ['curiosity', '', 1],
 ['dna relatives', '', 1],
 ['excitement', '', 56],
 ['excitement', 'ancestry', 4],
 ['excitement', 'hypochondria', 1],
 ['gratefulness', '', 2],
 ['health', '', 57],
 ['health', 'concerns', 2],
 ['health', 'heal

In [10]:
import pyecharts.options as opts
from pyecharts.charts import HeatMap

c = HeatMap().add_xaxis(axis_data).add_yaxis(series_name='category', yaxis_data=axis_data, value=iam_data).set_series_opts().set_global_opts(
        legend_opts=opts.LegendOpts(is_show=False),

        xaxis_opts=opts.AxisOpts(
            type_="category", axislabel_opts=opts.LabelOpts(interval=0, rotate=30)
        ),
        yaxis_opts=opts.AxisOpts(
            type_="category", axislabel_opts=opts.LabelOpts(interval=0)
        ),
        visualmap_opts=opts.VisualMapOpts(
            is_show=False
        ),
    )
c.load_javascript()

<pyecharts.render.display.Javascript at 0x27e2f8558e0>

In [11]:
c.render_notebook()

In [15]:
from pyecharts import options as opts
from pyecharts.charts import Sankey
from pprint import pprint

nodes = set()
links = []
for _, row in df_iam.iterrows():
    if not pandas.isna(row['category1']):
        nodes.add(row['category1'])
    if not pandas.isna(row['category2']):
        nodes.add(row['category2'])
for _, row in df_iam.groupby(['category1', 'category2']).count()['status_id'].reset_index().iterrows():
     if not pandas.isna(row['category1']):
         if pandas.isna(row['category2']):
             nodes.add('other')
             links.append({'source': row['category1'], 'target': 'other', 'value': row['status_id']})
         else:
             if any(l for l in links if l['source'] == row['category2'] and l['target'] == row['category1']):
                 links.append({'source': row['category2'], 'target': row['category1'], 'value': row['status_id']})
             else:
                 links.append({'source': row['category1'], 'target': row['category2'], 'value': row['status_id']})
nodes = [{'name': n} for n in nodes]
pprint(nodes)
pprint(links)
c = Sankey().add('Tweet category', nodes, links).set_global_opts(title_opts=opts.TitleOpts(title="'I am' tweets categories"))
c.load_javascript()

[{'name': 'hypochondria'},
 {'name': 'unclassified'},
 {'name': 'concerns'},
 {'name': 'opinion_23&me'},
 {'name': 'disbelief'},
 {'name': 'curiosity'},
 {'name': 'investors'},
 {'name': 'ancestry'},
 {'name': 'health'},
 {'name': 'promo deals'},
 {'name': 'identity'},
 {'name': 'shock'},
 {'name': 'skepticism'},
 {'name': 'mismatch'},
 {'name': 'questions'},
 {'name': 'humour'},
 {'name': 'science curious'},
 {'name': 'excitement'},
 {'name': 'surprise'},
 {'name': 'lifestyle'},
 {'name': 'dna relatives'},
 {'name': 'itentity'},
 {'name': 'adoption'},
 {'name': 'gratefulness'},
 {'name': 'recommentadion'},
 {'name': 'taking a test'}]
[{'source': 'adoption', 'target': 'health', 'value': 1},
 {'source': 'ancestry', 'target': 'adoption', 'value': 7},
 {'source': 'ancestry', 'target': 'concerns', 'value': 5},
 {'source': 'ancestry', 'target': 'curiosity', 'value': 1},
 {'source': 'ancestry', 'target': 'disbelief', 'value': 1},
 {'source': 'ancestry', 'target': 'excitement', 'value': 5},
 

<pyecharts.render.display.Javascript at 0x15bbd686550>

In [None]:
c.render_notebook()

In [12]:
from snapshot_selenium import snapshot
from pyecharts.render import make_snapshot

make_snapshot(snapshot, c.render(), "output/images/i_am.png")

In [14]:
nodes = [
    {"name": "category1"},
    {"name": "category2"},
    {"name": "category3"},
    {"name": "category4"},
    {"name": "category5"},
    {"name": "category6"},
]

links = [
    {"source": "category1", "target": "category2", "value": 10},
    {"source": "category2", "target": "category3", "value": 15},
    {"source": "category3", "target": "category4", "value": 20},
    {"source": "category5", "target": "category6", "value": 25},
    {"source": "category6", "target": "category4", "value": 5},
    {"source": "category4", "target": "category2", "value": 5},
]

sankey=Sankey()
sankey.add(
        "sankey",
        nodes = nodes,
        links = links,
        pos_top='10%',
        node_width=30,
        node_gap=100,
        node_align='left',
    )
sankey.render_notebook()

In [18]:
import random
[[i, j, random.randint(0, 50)] for i in range(24) for j in range(7)]

[[0, 0, 15],
 [0, 1, 8],
 [0, 2, 33],
 [0, 3, 50],
 [0, 4, 46],
 [0, 5, 0],
 [0, 6, 47],
 [1, 0, 33],
 [1, 1, 49],
 [1, 2, 13],
 [1, 3, 4],
 [1, 4, 2],
 [1, 5, 25],
 [1, 6, 4],
 [2, 0, 49],
 [2, 1, 22],
 [2, 2, 5],
 [2, 3, 8],
 [2, 4, 7],
 [2, 5, 18],
 [2, 6, 2],
 [3, 0, 3],
 [3, 1, 45],
 [3, 2, 50],
 [3, 3, 43],
 [3, 4, 31],
 [3, 5, 38],
 [3, 6, 44],
 [4, 0, 20],
 [4, 1, 3],
 [4, 2, 44],
 [4, 3, 14],
 [4, 4, 27],
 [4, 5, 6],
 [4, 6, 50],
 [5, 0, 36],
 [5, 1, 30],
 [5, 2, 37],
 [5, 3, 27],
 [5, 4, 2],
 [5, 5, 17],
 [5, 6, 42],
 [6, 0, 14],
 [6, 1, 15],
 [6, 2, 43],
 [6, 3, 16],
 [6, 4, 19],
 [6, 5, 20],
 [6, 6, 22],
 [7, 0, 3],
 [7, 1, 32],
 [7, 2, 0],
 [7, 3, 24],
 [7, 4, 38],
 [7, 5, 41],
 [7, 6, 43],
 [8, 0, 4],
 [8, 1, 2],
 [8, 2, 37],
 [8, 3, 14],
 [8, 4, 18],
 [8, 5, 24],
 [8, 6, 9],
 [9, 0, 40],
 [9, 1, 14],
 [9, 2, 30],
 [9, 3, 39],
 [9, 4, 5],
 [9, 5, 5],
 [9, 6, 6],
 [10, 0, 27],
 [10, 1, 28],
 [10, 2, 44],
 [10, 3, 29],
 [10, 4, 32],
 [10, 5, 8],
 [10, 6, 23],
 [11, 0, 17],
