In [47]:
data_dict = {
    'offline': {
        "school_friendship": [
            "spanish_highschools__1",
            "spanish_highschools__2",
            "spanish_highschools__6",
            "spanish_highschools__11_10",
            "spanish_highschools__11_9",
        ],
        "school_contact": [
            "sp_high_school_new__2011",
            "sp_high_school_new__2012",
            "sp_primary_school",
        ],
        'email': [
            "email_enron",
            "uni_email",
            "email_eu",
            "dnc"
        ]
    },
    'online': {
        'facebook': [
            "facebook_wall",
            "facebook_organizations__L1",
            "facebook_organizations__L2",
            "facebook_organizations__M1",
            "ego_social__facebook_107",
            "ego_social__facebook_1912",
            "ego_social__facebook_combined",
        ],
        'google_plus': [
            "ego_social__gplus_101133961721621664586",
            "ego_social__gplus_100500197140377336562",
            "ego_social__gplus_101133961721621664586",
            "ego_social__gplus_114336431216099933033",
        ],
        'twitter': [
            "twitter_15m",
            "twitter",
        ],
    },
}

In [48]:
import requests
import os
import zstandard as zstd

In [49]:
def decompress_zst(input_path, output_path=None):
    if output_path is None:
        output_path = os.path.splitext(input_path)[0]  # removes .zst
    
    with open(input_path, 'rb') as compressed:
        dctx = zstd.ZstdDecompressor()
        with open(output_path, 'wb') as output:
            dctx.copy_stream(compressed, output)
    
    print(f"Decompressed to: {output_path}")
    return output_path

def download_graphs(network_names, save_dir="downloads"):
    os.makedirs(save_dir, exist_ok=True)
    
    base_url = "https://networks.skewed.de/net/{name}/files/{net}.gt.zst"
    file_paths = []
    for full_name in network_names:
        if "__" in full_name:
            name, net = full_name.split("__")[0], full_name.split("__")[1]
        else:
            name = net = full_name

        url = base_url.format(name=name, net=net)
        file_path = os.path.join(save_dir, f"{full_name}.gt.zst")
        
        if os.path.exists(file_path):
            print(f"✔ {file_path} already exists, skipping download.")
            continue
        try:
            print(f"Downloading {full_name}...")
            response = requests.get(url, stream=True)
            response.raise_for_status()
            
            with open(file_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
                    
            print(f"✔ Downloaded {full_name} to {file_path}")
        
        except requests.exceptions.RequestException as e:
            print(f"✘ Failed to download {full_name}: {e}")

        file_paths.append(file_path)

        # Decompress the downloaded file
        try:
            decompress_zst(file_path)
        except Exception as e:
            print(f"✘ Failed to decompress {file_path}: {e}")




target_dir = os.path.join("..", "..", "DATA", "netzschleuder")
network_list = [net for off_on in data_dict.values() for category in off_on.values() for net in category]
download_graphs(network_list, target_dir)


✔ ../../DATA/netzschleuder/spanish_highschools__1.gt.zst already exists, skipping download.
✔ ../../DATA/netzschleuder/spanish_highschools__2.gt.zst already exists, skipping download.
✔ ../../DATA/netzschleuder/spanish_highschools__6.gt.zst already exists, skipping download.
Downloading spanish_highschools__11_10...
✔ Downloaded spanish_highschools__11_10 to ../../DATA/netzschleuder/spanish_highschools__11_10.gt.zst
Decompressed to: ../../DATA/netzschleuder/spanish_highschools__11_10.gt
Downloading spanish_highschools__11_9...
✔ Downloaded spanish_highschools__11_9 to ../../DATA/netzschleuder/spanish_highschools__11_9.gt.zst
Decompressed to: ../../DATA/netzschleuder/spanish_highschools__11_9.gt
Downloading sp_high_school_new__2011...
✔ Downloaded sp_high_school_new__2011 to ../../DATA/netzschleuder/sp_high_school_new__2011.gt.zst
Decompressed to: ../../DATA/netzschleuder/sp_high_school_new__2011.gt
Downloading sp_high_school_new__2012...
✔ Downloaded sp_high_school_new__2012 to ../../D