In [1]:
# @title Setup
from google.colab import auth
from google.cloud import bigquery
from google.colab import data_table
import networkx as nx
import pandas as pd

project = 'asymmetric-cove-187011' # Project ID inserted based on the query results selected to explore
# project = 'glossy-odyssey-366820'
location = 'US' # Location inserted based on the query results selected to explore
client = bigquery.Client(project=project, location=location)
data_table.enable_dataframe_formatter()
auth.authenticate_user()

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Links Data


In [3]:
links = client.get_job('bquxjob_247e4004_18f7744460a')
links_df = links.to_dataframe()

In [4]:
print(links.query)

SELECT * from glossy-odyssey-366820.farcaster.links


In [5]:
links_df.type.value_counts()

type
follow      82545116
unfollow          28
partof             6
1                  5
buddy              1
remove             1
add                1
Name: count, dtype: int64

In [6]:
links_df.head()

Unnamed: 0,fid,timestamp,target_fid,type,deleted_at
0,512238,2024-05-07 15:23:02+00:00,504790,follow,NaT
1,429952,2024-04-02 19:01:18+00:00,15576,follow,NaT
2,403981,2024-04-03 21:42:11+00:00,284633,follow,NaT
3,357407,2024-03-18 20:02:44+00:00,351708,follow,NaT
4,386214,2024-04-02 12:06:24+00:00,302556,follow,NaT


In [None]:

earliest_date = links_df.timestamp.dt.date.min

# Profile Data

In [None]:
profiles = client.get_job('bquxjob_3a4ba4b5_18f7746284b')
profiles_df = profiles.to_dataframe()

In [None]:
print(profiles.query)

SELECT * from glossy-odyssey-366820.farcaster.profiles;


In [None]:
profiles_df.head()

Unnamed: 0,fid,last_updated_at,data,custody_address
0,504692,2024-04-28 08:29:33+00:00,,0x77b0352100ebdb86a45557296e05721e48cc6d01
1,511997,2024-05-02 16:45:35+00:00,,0xed414b0b2d8ba9208c26fd264177f0afd7ab1c7f
2,530839,2024-05-11 10:39:33+00:00,,0xef7c5ee8eb10cd4ae60611506532e1a6a279c070
3,529570,2024-05-10 18:29:43+00:00,,0xf5efc5ae51cfd375bb82a7bd32e07998a232ff2d
4,521201,2024-05-08 11:05:15+00:00,,0x9ec342d937a5e723084c8801b6d0ad27fbfa5d68


In [None]:
profiles_df.data.value_counts()

data
{}                                                                                                                                                                                                                                    179049
{"display":"test2231","pfp":"https://i.imgur.com/URVj5Ct.jpg"}                                                                                                                                                                            13
{"bio":""}                                                                                                                                                                                                                                11
{"display":"MARK BEN FORD","pfp":"https://far.quest/DEFAULT_AVATAR.jpg"}                                                                                                                                                                   2
{"display":"Fordrellador","pfp":"https://far.qu

# Reactions

In [None]:
reactions_df = pd.read_parquet('/content/drive/My Drive/reactions_df.parquet')

In [None]:
# reactions = client.get_job('bquxjob_54a9ef94_18f7747fdf3')
# reactions_df = reactions.to_dataframe()

In [None]:
reactions_df.head()

Unnamed: 0,fid,timestamp,target_cast_fid,target_cast_hash,type,deleted_at
0,407625,2024-04-09 16:12:38+00:00,407533,0x069e08b4b4d033b286fe83681fbe862ab86d44de,recast,NaT
1,399370,2024-04-09 13:57:02+00:00,430467,0xed4b387afb6008eac3451cb97caa4ff47731db60,recast,NaT
2,435857,2024-04-09 13:59:51+00:00,280293,0x0e04688f79b53fffa532e7bbb8d7ccd822169385,recast,NaT
3,459677,2024-04-09 18:21:36+00:00,386755,0x777fc9351b19c0f3c33bfbfcac656ad093aa24c0,recast,NaT
4,405980,2024-04-03 03:29:55+00:00,300589,0x600487fc3af09dd73030409105fc2aed9c94a78b,recast,NaT


In [None]:
reactions1k_df = reactions_df[0:100000]

In [None]:
reactions_df.to_parquet('reactions.parquet')

In [None]:
# chunk_size = 1000000  # Adjust based on your data and available memory
# for i in range(0, len(reactions_df), chunk_size):
#     reactions_df.iloc[i:i+chunk_size].to_parquet(f'reactions_chunks/reactions_chunk_{i}.parquet')

In [None]:
reactions_df.to_parquet('/content/drive/My Drive/reactions_df.parquet')

In [None]:
pd.pivot_table(reactions1k_df, values = 'type', index='target_cast_fid', aggfunc='sum')

Unnamed: 0_level_0,type
target_cast_fid,Unnamed: 1_level_1
3,likelikelike
8,recast
99,likelike
108,like
239,like
...,...
518973,like
519238,like
524760,like
525598,like


In [None]:
reactions_fid_df = pd.pivot_table(reactions_df, values = 'type', index='target_cast_fid', aggfunc='sum')

# Convert tabular data to graph data

In [None]:
G = nx.from_pandas_edgelist(
    links_df,
    source='fid',
    target='target_fid',
    create_using=nx.DiGraph()
)

# Run Pagerank on the fids




In [None]:
pr = nx.pagerank(G)

# Process results to find top 10 fids by pagerank

In [None]:
df_pr = pd.DataFrame.from_dict(pr,  orient='index', columns = ['pagerank']).reset_index(names=['fid'])

In [None]:
df_pr



Unnamed: 0,fid,pagerank
0,442004,5.461374e-06
1,431575,3.328134e-06
2,218553,1.267739e-06
3,472,1.929679e-03
4,410576,4.576353e-07
...,...,...
403922,177001,4.573168e-07
403923,184945,4.573168e-07
403924,361244,4.538852e-07
403925,168424,4.573168e-07


In [None]:
pr_sorted_df = df_pr.sort_values(by='pagerank', ascending=False)

In [None]:
pr_sorted_profiles_df = pd.merge(profiles_df, pr_sorted_df, on = 'fid').sort_values('pagerank', ascending=False)

In [None]:
pr_sorted_profiles_df.head()

Unnamed: 0,fid,last_updated_at,data,custody_address,pagerank
378175,3,2023-08-31 16:52:50+00:00,"{""bio"":""Working on Farcaster and Warpcast."",""d...",,0.00612
389665,5650,2024-01-19 01:20:58+00:00,"{""bio"":""hullo"",""display"":""Vitalik Buterin"",""pf...",,0.005528
260793,99,2024-04-16 01:55:58+00:00,"{""bio"":""@base contributor #001; onchain cities...",,0.004781
368449,2,2024-05-06 21:10:43+00:00,"{""bio"":""Technowatermelon. Elder Millenial. Bui...",,0.004591
257875,207,2024-04-12 20:29:26+00:00,"{""bio"":""(summer of) protocol {support | guild ...",,0.004421


In [None]:
pr_sorted_profiles_df[['pagerank', 'fid', 'data']][0:100]

Unnamed: 0,pagerank,fid,data
378175,0.006120,3,"{""bio"":""Working on Farcaster and Warpcast."",""d..."
389665,0.005528,5650,"{""bio"":""hullo"",""display"":""Vitalik Buterin"",""pf..."
260793,0.004781,99,"{""bio"":""@base contributor #001; onchain cities..."
368449,0.004591,2,"{""bio"":""Technowatermelon. Elder Millenial. Bui..."
257875,0.004421,207,"{""bio"":""(summer of) protocol {support | guild ..."
...,...,...,...
378177,0.001163,302,"{""bio"":""Working on Farcaster."",""display"":""Goks..."
369142,0.001152,4407,"{""bio"":""The Farcaster formerly known as Kchamp..."
401725,0.001129,9816,"{""bio"":""🇫🇷 Generative artist • he/him • \nNFTs..."
397752,0.001126,1689,"{""bio"":""wannabe cypherpunk, engineer working o..."


In [None]:
pr_sorted_profiles_df[['pagerank', 'fid', 'data']].sample(n=100)

Unnamed: 0,pagerank,fid,data
397085,1.062947e-05,5606,"{""bio"":""time traveler"",""display"":""ceo"",""pfp"":""..."
351722,5.374585e-07,283833,"{""bio"":""Nice"",""display"":""Trang 🔵 🎩"",""pfp"":""htt..."
51372,1.107243e-06,461564,"{""bio"":""A guy with a keen interest in art. “Ne..."
392729,1.310320e-06,255065,"{""bio"":""mari kita ngopi bang"",""display"":""aang ..."
97801,4.667486e-07,533383,"{""bio"":""Human enough"",""display"":""Abisoye Oluwa..."
...,...,...,...
291638,6.484561e-07,193329,"{""bio"":""Do today for the best"",""display"":""Kitp..."
144565,7.220078e-07,437910,"{""bio"":""Soon TVA $1M 🐳"",""display"":""M. Aslam Ar..."
277759,3.320972e-06,309053,"{""bio"":""College Teacher & COINS HOLDER 🤑"",""dis..."
146077,4.799988e-07,384810,"{""bio"":""Stuff"",""display"":""James"",""pfp"":""https:..."


In [None]:
pr_sorted_profiles_df[['pagerank', 'fid', 'data']][0:100]

In [None]:
pr_sorted_profiles_df[['pagerank', 'fid', 'data']]



Unnamed: 0,pagerank,fid,data
353805,6.255433e-03,3,"{""bio"":""Working on Farcaster and Warpcast."",""d..."
365275,5.715855e-03,5650,"{""bio"":""hullo"",""display"":""Vitalik Buterin"",""pf..."
236389,4.879273e-03,99,"{""bio"":""@base contributor #001; onchain cities..."
344070,4.694713e-03,2,"{""bio"":""Technowatermelon. Elder Millenial. Bui..."
233479,4.496411e-03,207,"{""bio"":""(summer of) protocol {support | guild ..."
...,...,...,...
18733,4.879229e-07,362956,"{""bio"":"""",""display"":""Maya Wulandari"",""pfp"":""ht..."
18740,4.879229e-07,361294,"{""bio"":"""",""display"":""Syahrul "",""pfp"":""https://..."
18743,4.879229e-07,362836,"{""bio"":"""",""display"":""M.debrian Zamar"",""pfp"":""h..."
228350,4.879229e-07,362526,"{""bio"":"""",""display"":""Siti Aulia"",""pfp"":""https:..."


