In [1]:
# !pip install polars
import polars as pl
import os
pl.Config.set_fmt_str_lengths(400)

polars.config.Config

In [2]:
DATA_PATH = os.getenv("DATA_PATH", "")

In [5]:
fids_df = pl.read_parquet(f"{DATA_PATH}/raw/farcaster-fids-0-1730134800.parquet")

In [19]:
# profiles_df = pl.read_parquet(f"{DATA_PATH}/raw/farcaster-fids-0-1730134800.parquet")farcaster-fids-0-1727974800

In [6]:
links_df = pl.read_parquet(f"{DATA_PATH}/raw/farcaster-links-0-1730134800.parquet")

In [7]:
reactions_lazy_df = pl.scan_parquet(f"{DATA_PATH}/raw/farcaster-reactions-0-1730134800.parquet")

In [8]:
verifications_df = pl.read_parquet(f"{DATA_PATH}/raw/farcaster-verifications-0-1730134800.parquet")

In [9]:
# Verifications count per fid
verifications_count_df = (
    verifications_df
    .group_by("fid")
    .agg(pl.count("id").alias("verifications_count"))
)

In [10]:
# Likes count and recasts count per fid
reactions_count_lazy = (
    reactions_lazy_df
    .filter(pl.col("deleted_at").is_null())  # Filter out deleted reactions
    .group_by(["fid", "reaction_type"])
    .agg(pl.count("id").alias("count"))
)

In [11]:
reactions_count_df = reactions_count_lazy.collect()

In [16]:
reactions_pivot_df = (
    reactions_count_df
    .pivot(
        values="count",
        index="fid",
        columns="reaction_type",
        aggregate_function="first"
    )
    .rename({"1": "likes_count", "2": "recasts_count"})
    .fill_null(0)
)

In [12]:
following_count_df = (
    links_df
    .filter((pl.col("type") == "follow") & (pl.col("deleted_at").is_null()))
    .group_by("fid")
    .agg(pl.count("id").alias("following_count"))
)

In [13]:
# Followers count (number of users following the fid)
followers_count_df = (
    links_df
    .filter((pl.col("type") == "follow") & (pl.col("deleted_at").is_null()))
    .group_by("target_fid")
    .agg(pl.count("id").alias("followers_count"))
    .rename({"target_fid": "fid"})
)

In [17]:
result_df = fids_df.join(verifications_count_df, on="fid", how="left")
result_df = result_df.join(following_count_df, on="fid", how="left")
result_df = result_df.join(followers_count_df, on="fid", how="left")
result_df = result_df.join(reactions_pivot_df, on="fid", how="left")
result_df = result_df.fill_null(0)

  result_df = fids_df.join(verifications_count_df, on="fid", how="left")
  result_df = result_df.join(following_count_df, on="fid", how="left")
  result_df = result_df.join(followers_count_df, on="fid", how="left")
  result_df = result_df.join(reactions_pivot_df, on="fid", how="left")


In [18]:
result_df.write_parquet(f"{DATA_PATH}/interim/aggregated_links_statistics.parquet")

In [19]:
import psutil
import os

# Function to print the current memory usage
def print_memory_usage():
    process = psutil.Process(os.getpid())
    memory = process.memory_info().rss / (1024 * 1024)  # Convert to MB
    print(f"Memory Usage: {memory:.2f} MB")

In [20]:
print_memory_usage()

Memory Usage: 3014.19 MB


In [21]:
result_df.sample(10)

created_at,updated_at,custody_address,registered_at,fid,verifications_count,following_count,followers_count,likes_count,recasts_count
datetime[ns],datetime[ns],binary,"datetime[ns, UTC]",i64,i64,i64,i64,i64,i64
2023-10-13 15:46:47.027210,2024-06-07 18:06:03.493,"b""\x1b&\xba\xe7\xce\x93\xe7\xe4\xed\x0d\xae\xc8\x0avK\xc5\xbd\x9eg\x0f""",2023-11-07 20:55:21 UTC,75610,0,0,0,0,0
2023-10-13 18:39:08.710805,2024-06-07 18:16:20.200,"b""\xfc2<2\x13z\xde/\x80z\x99\x155(K\xd3\xaf\xbf\x92\xdd""",2023-11-07 21:53:27 UTC,180076,0,0,0,0,0
2024-02-06 18:19:19.577041,2024-06-07 23:14:59.387,"b""\xc6\xda\xe4\xc8\x86\x18\xa7f\xdcy\x8fq\xab\xd2\xb2A\xd9<s@""",2024-02-06 18:19:05 UTC,309891,1,77,9,8,8
2024-02-18 07:23:23.553758,2024-06-08 01:23:31.512,"b""\xee\xfa\xb1\xb4M\xcfO\x9a\x00\x98.\x923s+$d\xbf\xbd}""",2024-02-18 07:23:15 UTC,356855,0,1,1,1,0
2024-08-06 11:47:53.282542,2024-08-06 11:47:53.282542,"b""Rc\x92p/\x17\x1cT]\x93\xbeP\xf0\xdd\x98\xffr\x0a\x19c""",2024-08-06 11:47:47 UTC,819970,1,23,21,0,0
2024-07-04 20:02:27.386729,2024-07-04 20:02:27.386729,"b""\x8al\x1b\x9c\xf9\xc6\xf6\xdc.=(\xc8\x8c=\xf0\x7f\xf1\xea\xc6\xfe""",2024-07-04 20:02:21 UTC,762979,1,88,8,27,0
2024-06-11 14:11:41.386700,2024-06-11 14:11:41.386700,"b""\xc7.\xb5\xff\xa98\x16\xb2\xfb(\xecu\xa7\x8dH>0ue\xef""",2024-06-11 14:11:31 UTC,659187,0,101,13,0,0
2024-06-16 10:17:57.440533,2024-06-16 10:28:23.771,"b""E\x84h;i\xb8\xc1\xedl\x01A\xb6\x1f}\xf3\xae\xca\x1eX\x07""",2024-06-16 10:28:15 UTC,685802,0,0,3,0,0
2024-05-25 18:40:47.404723,2024-06-08 17:31:13.159,"b""P\x99\xb1\x9d\xd5Ci\xbak\xf5\xf9\x0doH\xbfVr\xcb\x102""",2024-05-25 18:40:37 UTC,575890,0,74,13,0,0
2024-05-18 14:17:11.230045,2024-06-08 17:04:32.430,"b""\xbf\xb9\x80^\x0c~e0""\x90\xb0.\xfb\xd9\xae\xebo-)\xd7""",2024-05-18 14:17:05 UTC,543802,1,320,24,0,0


In [22]:
joao_fid = 12031

In [23]:
user_statistics = result_df.filter(pl.col("fid") == joao_fid)


In [24]:
user_statistics

created_at,updated_at,custody_address,registered_at,fid,verifications_count,following_count,followers_count,likes_count,recasts_count
datetime[ns],datetime[ns],binary,"datetime[ns, UTC]",i64,i64,i64,i64,i64,i64
2023-09-05 23:18:56.166954,2024-06-07 17:17:31.984,"b""\xb5\x80\x0cCv\x1csPE*\xfa\xc6\xff\x01x\xc1e\xd3\xf2\x06""",2023-11-07 20:14:03 UTC,12031,2,208,4124,621,37


In [25]:
links_df.filter(pl.col("fid") == joao_fid)

fid,target_fid,hash,timestamp,created_at,updated_at,deleted_at,type,display_timestamp,id
i64,i64,binary,datetime[ns],datetime[ns],datetime[ns],datetime[ns],str,datetime[ns],i64
12031,2,"b""\xc3r@\x89\x1a-\x08\xe2HSR\x14t'%?*\x15{V""",2023-04-13 23:49:04,2023-09-05 23:18:56.247104,2023-09-05 23:18:56.247104,,"""follow""",,950143
12031,3,"b""\x08v\xa5\x97\xd8\xfa\xe1>\xb7\xe8a\x0a\xe1P\xaf\xd6\xcd\xa3\xcf\xf0""",2023-04-13 23:53:17,2023-09-05 23:18:56.247104,2023-09-05 23:18:56.247104,,"""follow""",,950144
12031,378,"b""\x0f\xdcd\xf2\xccJ\x98\xca\x80\xdaKT\xb0\xb0\xdbg\x12X\xd57""",2023-04-13 23:53:17,2023-09-05 23:18:56.247104,2023-09-05 23:18:56.247104,,"""follow""",,950145
12031,127,"b""\x14\x91\x92\xc7g\xa6\xeco\x86\xdd\xcc3\xfc.\xe7\xbdv!c\x06""",2023-04-13 23:53:17,2023-09-05 23:18:56.247104,2023-09-05 23:18:56.247104,,"""follow""",,950146
12031,145,"b""""\x8f\xc1\x87\xe4\xe9\x8cG\xe7gw]\x86\x84u\xd9Id\x94\xa6""",2023-04-13 23:53:17,2023-09-05 23:18:56.247104,2023-09-05 23:18:56.247104,,"""follow""",,950148
…,…,…,…,…,…,…,…,…,…
12031,155,"b""\xcc\xc6\xdc'\xc4\xd3`\x16\xf9\x1a\xf2\x9f%>\xdb\xe5\x17\x03\xb0\xcb""",2024-09-27 01:04:12,2024-09-27 01:04:13.112150,2024-09-27 01:04:13.112150,,"""follow""",,11876878174
12031,12256,"b""C\xc6\x81\xdf\x02r\x9e3\xeae\x81E\x81\x04EC\xb2\xb9\x1ea""",2024-09-27 05:40:54,2024-09-27 05:40:54.666807,2024-09-27 05:40:54.666807,,"""follow""",,11876933181
12031,2172,"b""\xd8\xbaH\xbdo\x83\xfc!\xef\x9a\xd5\xc3\x94\x95M\x1b\xd2\xa6\xfb\xa0""",2024-09-30 17:59:17,2024-09-30 17:59:17.354086,2024-09-30 17:59:17.354086,,"""follow""",,11877989030
12031,862100,"b""\x01,\x97\xce\xd0\x0aR]>\xc9\xa3\xee\xd2\xdd\xc8\xd9|\xc3G\xf9""",2024-10-15 19:51:28,2024-10-15 20:51:35.238333,2024-10-15 20:51:35.238333,,"""follow""",,11882366667


In [26]:
links_df.filter(pl.col("target_fid") == joao_fid)

fid,target_fid,hash,timestamp,created_at,updated_at,deleted_at,type,display_timestamp,id
i64,i64,binary,datetime[ns],datetime[ns],datetime[ns],datetime[ns],str,datetime[ns],i64
2,12031,"b""`!\xd6\x13\xda\x0f\x1ac<\xb5\xf6\xc0\x14\x80\x9b\x8e\x94\x18\xda\xb0""",2023-07-26 15:05:11,2023-09-05 23:07:30.968478,2023-09-05 23:07:30.968478,,"""follow""",,853
5,12031,"b""\x023\x128_\xb9\xcd%\x19nbn\xb3{\xe8\x0b\x01\x8b\x02\xf2""",2023-08-31 23:30:43,2023-09-05 23:07:31.962471,2023-09-05 23:07:31.962471,,"""follow""",,1362
15,12031,"b""\x1e\x86R\x99}k\x1c\xfc\x85x""~\x01\xb1\xa5\xff\x0a4\x94\x19""",2023-04-14 00:06:57,2023-09-05 23:07:34.344245,2023-09-05 23:07:34.344245,,"""follow""",,5172
17,12031,"b""!\x91z`\x8b\xb4\xff\xae\x09\x89\xbb\x86e\xce\xe6\xa9\xcfz\x0e\xca""",2023-04-13 23:59:52,2023-09-05 23:07:34.566618,2023-09-05 23:07:34.566618,,"""follow""",,5663
18,12031,"b""\xf1\xe1\xb0\x18F\x07\xb6\x16\xd7\xbe'\xde%\x1fz\x84\xedZjE""",2023-07-28 02:58:39,2023-09-05 23:07:34.741851,2023-09-05 23:07:34.741851,,"""follow""",,5912
…,…,…,…,…,…,…,…,…,…
834651,12031,"b""\xdb'>1\x82\xb8\xbd3\xf9\xa6V7\x8cW2\x10\xa2\x8e\x05\xe8""",2024-10-28 03:21:28,2024-10-28 03:21:29.352564,2024-10-28 03:21:29.352564,,"""follow""",,11884722203
835546,12031,"b""S\xce\x05\xa8\x90F\x9b\xbf8\x0d\x00\xd2\xd9\xac\xfe\x82\xed\xa0`\xb6""",2024-10-28 03:47:28,2024-10-28 03:47:28.449977,2024-10-28 03:47:28.449977,,"""follow""",,11884723388
836471,12031,"b""\xf2\xacN8\xc5\xa8:fE\xf0l\x09Q\x7f-\xa9\xe4\x0a\xa6D""",2024-10-28 03:56:24,2024-10-28 03:56:24.479156,2024-10-28 03:56:24.479156,,"""follow""",,11884723903
827924,12031,"b""\x90\x13d\x96\x06\xe5Lv""\xfa\xc4\x87\xd4\xa8\xdf\x8b\x86T\x95\xe2""",2024-10-28 04:40:21,2024-10-28 04:40:21.751806,2024-10-28 04:40:21.751806,,"""follow""",,11884727480


In [36]:
# should filter by the links where the deleted at is null

In [None]:
# add casts 

In [27]:
casts_lazy_df = pl.scan_parquet(f"{DATA_PATH}/raw/farcaster-casts-0-1730134800.parquet")

In [28]:
casts_count_lazy = (
    casts_lazy_df
    .filter(pl.col("deleted_at").is_null())  # Filter out deleted casts
    .group_by("fid")
    .agg([
        pl.count("id").alias("total_casts_count"),
        (pl.col("parent_hash").is_not_null() & (pl.col("parent_hash").is_not_null())).sum().alias("reply_casts_count"),
        (pl.col("parent_hash").is_null()).sum().alias("casts_count"),
    ])
)

In [29]:
casts_count_df = casts_count_lazy.collect()

In [30]:
casts_count_df

fid,total_casts_count,reply_casts_count,casts_count
i64,u32,u32,u32
470492,19,5,14
501652,22,2,20
616114,17,0,17
699445,1,0,1
476649,82,61,21
…,…,…,…
628785,11,0,11
441067,7405,5539,1866
477911,6,3,3
208666,1,1,0


In [32]:
df = pl.read_parquet(f"{DATA_PATH}/interim/aggregated_links_statistics.parquet")

In [33]:
# df = df.drop(["casts_count","casts_count_right"])
# df

In [33]:
casts_count_df.sample(10)

fid,total_casts_count,reply_casts_count,casts_count
i64,u32,u32,u32
281643,104,86,18
858812,4,1,3
194271,11,2,9
532705,19,11,8
544849,31,5,26
190548,1,1,0
658884,4,3,1
561588,13,0,13
206909,3,3,0
577342,19,0,19


In [34]:
result_df = df.join(casts_count_df, on="fid", how="left")
result_df = result_df.fill_null(0)

  result_df = df.join(casts_count_df, on="fid", how="left")


In [35]:
result_df.write_parquet(f"{DATA_PATH}/interim/aggregated_links_statistics.parquet")

In [21]:
# casts_by_fid = casts_lazy_df.filter((pl.col('fid')==400242) & (pl.col('deleted_at').is_null()))
# result = casts_by_fid.collect()

In [20]:
# result.filter(~pl.col("root_parent_url").str.contains("https://warpcast.com/~/channel"))

created_at,updated_at,deleted_at,timestamp,fid,hash,parent_hash,parent_fid,parent_url,text,embeds,mentions,mentions_positions,root_parent_hash,root_parent_url,id
datetime[ns],datetime[ns],datetime[ns],datetime[ns],i64,binary,binary,i64,str,str,str,str,str,binary,str,i64
2024-03-24 07:08:16.583831,2024-03-24 07:08:16.583831,,2024-03-24 07:08:17,400242,"b""|\xb7\x8d\xecx\x982\xd4`\x913\xdf\xbd\x1a\xb1(}\x85^(""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT by completely onframe with $DEGEN or ETH""","""[{""url"": ""https://toka.lol/collect/base:0x6b1ce2699455a096fb617a0d1850467c827979d5/2""}, {""castId"": {""fid"": 249927, ""hash"": {""data"": [100, 232, 88, 29, 234, 12, 108, 32, 172, 164, 63, 14, 191, 130, 10, 24, 243, 47, 217, 182], ""type"": ""Buffer""}}}]""","""[249927]""","""[17]""","b""|\xb7\x8d\xecx\x982\xd4`\x913\xdf\xbd\x1a\xb1(}\x85^(""","""https://warpcast.com/~/channel/toka""",963501345
2024-03-24 07:08:24.584160,2024-03-24 07:08:24.584160,,2024-03-24 07:08:25,400242,"b""\xa2\xa1\xeaP\xda\xff\xec\x80\xfd\x9f%\x0b\xc3\x9dG\xaeN\x8eN\xef""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT by completely onframe with $DEGEN or ETH""","""[{""url"": ""https://toka.lol/collect/base:0x82e30a63bccde3724877878a30c4977b52348198/1""}, {""castId"": {""fid"": 281295, ""hash"": {""data"": [249, 12, 49, 117, 91, 171, 69, 68, 160, 129, 113, 43, 196, 132, 220, 81, 130, 151, 32, 91], ""type"": ""Buffer""}}}]""","""[293263]""","""[17]""","b""\xa2\xa1\xeaP\xda\xff\xec\x80\xfd\x9f%\x0b\xc3\x9dG\xaeN\x8eN\xef""","""https://warpcast.com/~/channel/toka""",963505914
2024-03-24 07:09:02.292216,2024-03-24 07:09:02.292216,,2024-03-24 07:09:02,400242,"b""\xc8\xf7;\xa4\xa8Q\xf2\xb2\x9d\xc6\x87,-\xbd{9\xaaI\xa8\x0a""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT by completely onframe with $DEGEN or ETH""","""[{""url"": ""https://toka.lol/collect/base:0x3972af0ebfa3ffbfa0913243a5540c18308a9ca9/1""}, {""castId"": {""fid"": 292426, ""hash"": {""data"": [220, 53, 3, 253, 197, 220, 63, 206, 157, 135, 236, 152, 212, 18, 140, 241, 107, 26, 178, 128], ""type"": ""Buffer""}}}]""","""[5860]""","""[17]""","b""\xc8\xf7;\xa4\xa8Q\xf2\xb2\x9d\xc6\x87,-\xbd{9\xaaI\xa8\x0a""","""https://warpcast.com/~/channel/toka""",963515943
2024-03-24 07:19:48.723145,2024-03-24 07:19:48.723145,,2024-03-24 07:19:49,400242,"b""Y'\x935\xa4{\x85y\xb9Z\xe69\x1d!Y\x84\x95!\x1c\xdc""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT by completely onframe with $DEGEN or ETH""","""[{""url"": ""https://toka.lol/collect/base:0x3f07b910f892fc1d19547d63ced91719dc595e7a/1""}, {""castId"": {""fid"": 328795, ""hash"": {""data"": [58, 203, 248, 180, 206, 40, 55, 255, 56, 98, 206, 175, 252, 1, 216, 62, 33, 80, 196, 127], ""type"": ""Buffer""}}}]""","""[328795]""","""[17]""","b""Y'\x935\xa4{\x85y\xb9Z\xe69\x1d!Y\x84\x95!\x1c\xdc""","""https://warpcast.com/~/channel/toka""",963751753
2024-03-24 07:38:57.714247,2024-03-24 07:38:57.714247,,2024-03-24 07:38:58,400242,"b"":\xa1\xbc\x08\xb5k\xabl+=\xe6{<1\xb0\x0aZ\xe1r\x0f""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT by completely onframe with $DEGEN or ETH""","""[{""url"": ""https://toka.lol/collect/base:0x2d8f5a5530e6e8c0b53f2c39947cf0b3229cedeb/1""}, {""castId"": {""fid"": 236715, ""hash"": {""data"": [96, 190, 80, 213, 203, 203, 58, 43, 92, 175, 228, 192, 28, 9, 66, 60, 164, 45, 182, 249], ""type"": ""Buffer""}}}]""","""[236670]""","""[17]""","b"":\xa1\xbc\x08\xb5k\xabl+=\xe6{<1\xb0\x0aZ\xe1r\x0f""","""https://warpcast.com/~/channel/toka""",964171868
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2024-10-03 16:40:05.725472,2024-10-03 16:40:05.725472,,2024-10-03 16:40:06,400242,"b""\xe5\x8b\x90\x84)\xc8\x98\x1b\xd1\x80bd\xceY\xb6\xb0\xf3\x05}\xda""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT by completely onframe""","""[{""url"": ""https://toka.lol/collect/base:0x53dc7139cb12aaff78e9cea97fca9fcbdc08d8db/1""}, {""castId"": {""fid"": 218753, ""hash"": {""data"": [181, 116, 50, 46, 40, 200, 251, 73, 131, 171, 9, 164, 132, 255, 223, 223, 200, 29, 181, 189], ""type"": ""Buffer""}}}]""","""[415590]""","""[17]""","b""\xe5\x8b\x90\x84)\xc8\x98\x1b\xd1\x80bd\xceY\xb6\xb0\xf3\x05}\xda""","""https://warpcast.com/~/channel/toka""",5859307470
2024-10-03 16:41:34.950593,2024-10-03 16:41:34.950593,,2024-10-03 16:41:35,400242,"b""\xed=\x85\x89\xeb\x89\xe8*\x1d\xa6P\x83\xe6[\xc9\x1af\x87IA""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT by completely onframe""","""[{""url"": ""https://toka.lol/collect/zora:0xaaa3a3c3a1c03b3bc869d4a13d31a158ab49a323/1""}, {""castId"": {""fid"": 441555, ""hash"": {""data"": [55, 201, 4, 140, 131, 56, 21, 135, 178, 98, 145, 204, 125, 90, 230, 96, 74, 176, 229, 161], ""type"": ""Buffer""}}}]""","""[796850]""","""[17]""","b""\xed=\x85\x89\xeb\x89\xe8*\x1d\xa6P\x83\xe6[\xc9\x1af\x87IA""","""https://warpcast.com/~/channel/toka""",5859308158
2024-10-03 16:43:20.408571,2024-10-03 16:43:20.408571,,2024-10-03 16:43:20,400242,"b""\xf6v\xa7\x12\xcb\x1d\xcb\xa0\xf4\xbfm\xd5\xc2\x0a|\xb6ni\xa8o""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT completely onframe""","""[{""url"": ""https://toka.lol/collect/zora:0x3af1c9a4a51b3605beb963a99482deddaf143c66/44""}, {""castId"": {""fid"": 415950, ""hash"": {""data"": [221, 159, 167, 84, 133, 178, 177, 249, 48, 6, 26, 29, 172, 234, 108, 11, 132, 12, 180, 166], ""type"": ""Buffer""}}}]""","""[]""","""[]""","b""\xf6v\xa7\x12\xcb\x1d\xcb\xa0\xf4\xbfm\xd5\xc2\x0a|\xb6ni\xa8o""","""https://warpcast.com/~/channel/toka""",5859308927
2024-10-03 16:55:22.416695,2024-10-03 16:55:22.416695,,2024-10-03 16:55:22,400242,"b""\x1ar\xb7*\x1f\x1d\xab\xa3\x0c\xc9G\x00\xed_\xccV\xa7\xa4\xad\x88""",,,"""https://warpcast.com/~/channel/toka""","""Mint this NFT by completely onframe""","""[{""url"": ""https://toka.lol/collect/zora:0xbc4cd5ea817d5db5208d448cffdc1cb735f2c57a/1""}, {""castId"": {""fid"": 434522, ""hash"": {""data"": [50, 217, 44, 126, 22, 104, 228, 174, 42, 226, 235, 113, 32, 59, 208, 34, 247, 224, 39, 198], ""type"": ""Buffer""}}}]""","""[434522]""","""[17]""","b""\x1ar\xb7*\x1f\x1d\xab\xa3\x0c\xc9G\x00\xed_\xccV\xa7\xa4\xad\x88""","""https://warpcast.com/~/channel/toka""",5859314522


In [11]:
# b"\xed=\x85\x89\xeb\x89\xe8*\x1d\xa6P\x83\xe6[\xc9\x1af\x87IA"	.hex()

'ed3d8589eb89e82a1da65083e65bc91a66874941'

In [23]:
df_filtered = links_df.filter(
            (pl.col("deleted_at").is_null()) & 
            (pl.col("fid") != pl.col("target_fid")) & 
            (pl.col("type") == "follow")
        ).select([
            "fid", "target_fid"
        ])

In [24]:
mutual_links = df_filtered.join(
    df_filtered,
    left_on=["fid", "target_fid"],
    right_on=["target_fid", "fid"],
    suffix="_reverse"
)

KeyboardInterrupt: 

In [25]:
df_filtered = df_filtered.with_columns([
        pl.col("fid").cast(pl.Categorical),
        pl.col("target_fid").cast(pl.Categorical)
    ])

ComputeError: cannot cast numeric types to 'Categorical'

In [None]:
mutual_links = df_filtered.join(
        df_filtered,
        left_on=["fid", "target_fid"],
        right_on=["target_fid", "fid"],
        suffix="_reverse",
    )