In [23]:
import pandas as pd
import networkx as nx

# for the explode statement; convert string to list
from ast import literal_eval

# for community detection
import community as community_louvain

# Temporal Network

For temporal network visualization, this dataset is good enough for now. Ideally, I'd like to do bipartite projection and fetch the dates of each edge, but I'm struggling with that at the moment. To use Cosmograph's temporal capabilities, you need to have a "date" column. Just name it "date" and make sure it has temporal values.

In [24]:
data = r'data/arxiv_artificial_life_original.csv'

df = pd.read_csv(data)
df['authors'] = df['authors'].apply(literal_eval)

# remove everything that doesn't mention artificial life explicitely in the summary; too aggressive, but useful for now
mask = df['summary'].str.contains('artificial life', case=False)
df = df[mask]

df = df[['date_published', 'authors', 'title']]
df.columns = ['date', 'author', 'title']
df = df.explode('author')
df.reset_index(drop=True, inplace=True)

# fix the & so that scikit-network stops crashing; old legacy fix, probably not important for cosmograph
df['authors'] = df['author'].str.replace('&', 'and')
df['title'] = df['title'].str.replace('&', 'and')

df['date'] = pd.to_datetime(df['date'])

df.head()

Unnamed: 0,date,author,title,authors
0,2023-10-18 14:50:39+00:00,Alessandro Scirè,Emergence and criticality in spatiotemporal sy...,Alessandro Scirè
1,2023-10-13 11:58:18+00:00,Lakshwin Shreesha,Role of Morphogenetic Competency on Evolution,Lakshwin Shreesha
2,2023-06-22 19:32:53+00:00,M Charity,Amorphous Fortress: Observing Emergent Behavio...,M Charity
3,2023-06-22 19:32:53+00:00,Dipika Rajesh,Amorphous Fortress: Observing Emergent Behavio...,Dipika Rajesh
4,2023-06-22 19:32:53+00:00,Sam Earle,Amorphous Fortress: Observing Emergent Behavio...,Sam Earle


In [25]:
outfile = r'C:\Users\itsgo\PythonCode\100daysofnetworks\data\cosmograph\temporal_artificial_life.csv'

df.to_csv(outfile, index=False, header=True)

# Arxiv: Refreshed

I used the day 18 dataset builder to fetch some new data about Network Science and Artificial Intelligence. I will combine both of these datasets. The arxiv library seems a bit less reliable, now, and I'm not sure why. But it is able to pull datasets from Arxiv. Fetch smaller datasets, and smash them together.

What I'm doing:
- read csv data into indidual dataframes
- concatenate them together: smoosh them together
- rename the columns so that 'authors' is properly named as 'author', as each author will have it's own row
- also, "date_published" should be named as "date" to work with cosmograph
- save the combined data together

In [45]:
data_1 = r'C:\Users\itsgo\PythonCode\100daysofnetworks\data\arxiv_ai_20250118.csv'
data_2 = r'C:\Users\itsgo\PythonCode\100daysofnetworks\data\arxiv_network_science_20250118.csv'

df_1 = pd.read_csv(data_1)
df_2 = pd.read_csv(data_2)

df = pd.concat([df_1, df_2])
df = df[['date_published', 'authors', 'title']]
df['authors'] = df['authors'].apply(literal_eval)
df = df.explode('authors')
df.drop_duplicates(inplace=True)
df.columns = ['date', 'author', 'title']
df.head()

Unnamed: 0,date,author,title
0,2025-01-15 07:37:46+00:00,Hartmut Löwen,Towards Intelligent Active Particles
0,2025-01-15 07:37:46+00:00,Benno Liebchen,Towards Intelligent Active Particles
1,2025-01-13 16:28:01+00:00,Rolf Pfister,Understanding and Benchmarking Artificial Inte...
1,2025-01-13 16:28:01+00:00,Hansueli Jud,Understanding and Benchmarking Artificial Inte...
2,2025-01-13 10:23:14+00:00,Gwénolé Abgrall,Synthetic Data and Health Privacy


In [46]:
df.shape

(4180, 3)

In [47]:
outfile = r'C:\Users\itsgo\PythonCode\100daysofnetworks\data\cosmograph\temporal_ai_netsci.csv'

df.to_csv(outfile, index=False, header=True)