In [None]:
import networkx as nx
import pandas as pd
import requests
import matplotlib.pyplot as plt
import json
from datetime import datetime

#Get the data

Process of retrieving data from GitHub using the GitHub API. The main goal is to collect information about repositories written in the Rust programming language that were created after April 1st, as well as the users who have shown interest in them by starring them.

##Get recent repos

In [None]:
def get_recent_repositories():
    """
    Fetches recent repositories created after April 1, 2024, sorted by stars.

    Returns:
        list: List of dictionaries containing information about the repositories.
    """
    url = 'https://api.github.com/search/repositories'

    params = {
        'q': 'created:>=2024-04-01 language:rust',
        'sort': 'stars',
        'order': 'desc',
        'per_page': 50
    }

    response = requests.get(url, params=params)

    if response.status_code == 200:
        data = response.json()
        repositories = data.get('items', [])
        return repositories
    else:
        print("Failed to fetch repositories")
        return None

In [None]:
repositories = get_recent_repositories()

In [None]:
df = pd.DataFrame(repositories)

In [None]:
df.head()

Unnamed: 0,id,node_id,name,full_name,private,owner,html_url,description,fork,url,...,allow_forking,is_template,web_commit_signoff_required,topics,visibility,forks,open_issues,watchers,default_branch,score
0,786035979,R_kgDOLtn1Cw,ore-miner,tonyke-bot/ore-miner,False,"{'login': 'tonyke-bot', 'id': 14999583, 'node_...",https://github.com/tonyke-bot/ore-miner,ORE Miner built on top of Jito bundle with bot...,False,https://api.github.com/repos/tonyke-bot/ore-miner,...,True,False,False,[],public,212,19,461,main,1.0
1,782547300,R_kgDOLqS5ZA,ClangQL,AmrDeveloper/ClangQL,False,"{'login': 'AmrDeveloper', 'id': 23631699, 'nod...",https://github.com/AmrDeveloper/ClangQL,ClangQL is a tool that allow you to run SQL-l...,False,https://api.github.com/repos/AmrDeveloper/ClangQL,...,True,False,False,"[ast, clang, cpp, database, gitql, llvm, llvm-...",public,8,7,428,master,1.0
2,793955649,R_kgDOL1LNQQ,offset-allocator,pcwalton/offset-allocator,False,"{'login': 'pcwalton', 'id': 157897, 'node_id':...",https://github.com/pcwalton/offset-allocator,A port of Sebastian Aaltonen's `OffsetAllocato...,False,https://api.github.com/repos/pcwalton/offset-a...,...,True,False,False,[],public,5,1,351,main,1.0
3,780688305,R_kgDOLohbsQ,good_training_language,tsoding/good_training_language,False,"{'login': 'tsoding', 'id': 18597647, 'node_id'...",https://github.com/tsoding/good_training_language,Хороший Учебный Язык,False,https://api.github.com/repos/tsoding/good_trai...,...,True,False,False,[],public,5,2,317,main,1.0
4,780764359,R_kgDOLomExw,nix-inspect,bluskript/nix-inspect,False,"{'login': 'bluskript', 'id': 52386117, 'node_i...",https://github.com/bluskript/nix-inspect,Interactive tui for inspecting nix configs,False,https://api.github.com/repos/bluskript/nix-ins...,...,True,False,False,[],public,2,5,244,main,1.0


In [None]:
print("\nUnique Languages:")
print(df['language'].unique())


Unique Languages:
['Rust']


In [None]:
from tabulate import tabulate

stats = {
    'Minimum number of stars': [df['stargazers_count'].min()],
    'Q1 (25th percentile)': [df['stargazers_count'].quantile(0.25)],
    'Median (Q2)': [df['stargazers_count'].quantile(0.5)],
    'Q3 (75th percentile)': [df['stargazers_count'].quantile(0.75)],
    'Maximum number of stars': [df['stargazers_count'].max()]
}

stats_df = pd.DataFrame(stats)
print(tabulate(stats_df, headers='keys', tablefmt='pretty', showindex=False))

+-------------------------+----------------------+-------------+----------------------+-------------------------+
| Minimum number of stars | Q1 (25th percentile) | Median (Q2) | Q3 (75th percentile) | Maximum number of stars |
+-------------------------+----------------------+-------------+----------------------+-------------------------+
|          26.0           |         31.0         |    42.5     |        111.5         |          461.0          |
+-------------------------+----------------------+-------------+----------------------+-------------------------+


In [None]:
import csv
import json

def save_to_csv_full(repositories, filename):
    """
    Saves full repository data to a CSV file.

    Args:
        repositories (list): List of dictionaries containing repository information.
        filename (str): Name of the CSV file to save.
    """
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = set().union(*(repo.keys() for repo in repositories))

        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()

        for repo in repositories:
            writer.writerow(repo)

def save_to_json_full(repositories, filename):
    """
    Saves full repository data to a JSON file.

    Args:
        repositories (list): List of dictionaries containing repository information.
        filename (str): Name of the JSON file to save.
    """
    with open(filename, 'w', encoding='utf-8') as jsonfile:
        json.dump(repositories, jsonfile, indent=4)

In [None]:
save_to_csv_full(repositories, 'recent_repositories_from_april.csv')
save_to_json_full(repositories, 'recent_repositories_from_april.json')

In [None]:
import shutil

def move_files_to_data_folder():
    csv_file_path = 'recent_repositories_from_april.csv'
    json_file_path = 'recent_repositories_from_april.json'
    data_folder_path = '/content/drive/MyDrive/PMF 2 4/STROJNO + MREŽE/Data'

    try:
        shutil.move(csv_file_path, data_folder_path)
        print("CSV file moved successfully to 'data' folder.")
        shutil.move(json_file_path, data_folder_path)
        print("JSON file moved successfully to 'data' folder.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
move_files_to_data_folder()

CSV file moved successfully to 'data' folder.
JSON file moved successfully to 'data' folder.


##Get stargazers for recent repos

In [None]:
import pandas as pd
import requests
import json

In [None]:
df = pd.read_csv('/content/drive/MyDrive/PMF 2 4/STROJNO + MREŽE/Data/recent_repositories_from_april.csv')

df.head()

Unnamed: 0,merges_url,issue_comment_url,teams_url,branches_url,size,node_id,fork,forks,commits_url,languages_url,...,disabled,full_name,milestones_url,issue_events_url,contents_url,pushed_at,language,issues_url,license,archive_url
0,https://api.github.com/repos/tonyke-bot/ore-mi...,https://api.github.com/repos/tonyke-bot/ore-mi...,https://api.github.com/repos/tonyke-bot/ore-mi...,https://api.github.com/repos/tonyke-bot/ore-mi...,65,R_kgDOLtn1Cw,False,212,https://api.github.com/repos/tonyke-bot/ore-mi...,https://api.github.com/repos/tonyke-bot/ore-mi...,...,False,tonyke-bot/ore-miner,https://api.github.com/repos/tonyke-bot/ore-mi...,https://api.github.com/repos/tonyke-bot/ore-mi...,https://api.github.com/repos/tonyke-bot/ore-mi...,2024-04-16T02:16:07Z,Rust,https://api.github.com/repos/tonyke-bot/ore-mi...,,https://api.github.com/repos/tonyke-bot/ore-mi...
1,https://api.github.com/repos/AmrDeveloper/Clan...,https://api.github.com/repos/AmrDeveloper/Clan...,https://api.github.com/repos/AmrDeveloper/Clan...,https://api.github.com/repos/AmrDeveloper/Clan...,1657,R_kgDOLqS5ZA,False,8,https://api.github.com/repos/AmrDeveloper/Clan...,https://api.github.com/repos/AmrDeveloper/Clan...,...,False,AmrDeveloper/ClangQL,https://api.github.com/repos/AmrDeveloper/Clan...,https://api.github.com/repos/AmrDeveloper/Clan...,https://api.github.com/repos/AmrDeveloper/Clan...,2024-05-09T01:47:39Z,Rust,https://api.github.com/repos/AmrDeveloper/Clan...,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",https://api.github.com/repos/AmrDeveloper/Clan...
2,https://api.github.com/repos/pcwalton/offset-a...,https://api.github.com/repos/pcwalton/offset-a...,https://api.github.com/repos/pcwalton/offset-a...,https://api.github.com/repos/pcwalton/offset-a...,26,R_kgDOL1LNQQ,False,5,https://api.github.com/repos/pcwalton/offset-a...,https://api.github.com/repos/pcwalton/offset-a...,...,False,pcwalton/offset-allocator,https://api.github.com/repos/pcwalton/offset-a...,https://api.github.com/repos/pcwalton/offset-a...,https://api.github.com/repos/pcwalton/offset-a...,2024-05-02T17:54:01Z,Rust,https://api.github.com/repos/pcwalton/offset-a...,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",https://api.github.com/repos/pcwalton/offset-a...
3,https://api.github.com/repos/tsoding/good_trai...,https://api.github.com/repos/tsoding/good_trai...,https://api.github.com/repos/tsoding/good_trai...,https://api.github.com/repos/tsoding/good_trai...,2299,R_kgDOLohbsQ,False,5,https://api.github.com/repos/tsoding/good_trai...,https://api.github.com/repos/tsoding/good_trai...,...,False,tsoding/good_training_language,https://api.github.com/repos/tsoding/good_trai...,https://api.github.com/repos/tsoding/good_trai...,https://api.github.com/repos/tsoding/good_trai...,2024-04-07T13:18:19Z,Rust,https://api.github.com/repos/tsoding/good_trai...,,https://api.github.com/repos/tsoding/good_trai...
4,https://api.github.com/repos/bluskript/nix-ins...,https://api.github.com/repos/bluskript/nix-ins...,https://api.github.com/repos/bluskript/nix-ins...,https://api.github.com/repos/bluskript/nix-ins...,207,R_kgDOLomExw,False,2,https://api.github.com/repos/bluskript/nix-ins...,https://api.github.com/repos/bluskript/nix-ins...,...,False,bluskript/nix-inspect,https://api.github.com/repos/bluskript/nix-ins...,https://api.github.com/repos/bluskript/nix-ins...,https://api.github.com/repos/bluskript/nix-ins...,2024-04-19T02:21:13Z,Rust,https://api.github.com/repos/bluskript/nix-ins...,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",https://api.github.com/repos/bluskript/nix-ins...


In [None]:
with open('/content/drive/MyDrive/PMF 2 4/STROJNO + MREŽE/Data/recent_repositories_from_april.json', 'r') as file:
    repository_data = json.load(file)

In [None]:
def stargazers_for_recent_repos(repo_owner, repo_name, api_token):
    """
    Fetches all stargazers of a GitHub repository, handling pagination.

    Args:
        repo_owner (str): The username of the repository owner.
        repo_name (str): The name of the repository.
        api_token (str): GitHub API token.

    Returns:
        list: List of dictionaries containing information about the stargazers.
    """
    url = f'https://api.github.com/repos/{repo_owner}/{repo_name}/stargazers'
    headers = {"Authorization": f"token {api_token}"}

    stargazers = []
    total_stargazers = 0
    next_page = 1

    while next_page:
        response = requests.get(url, headers=headers, params={"page": next_page})

        if response.status_code == 200:
            data = response.json()

            stargazers.extend([{'repository_name': repo_name, 'username': user.get('login', ''), 'user_id': user.get('id', '')} for user in data])

            total_stargazers += len(data)

            if 'next' in response.links:
                next_page += 1
            else:
                next_page = None
        else:
            print(f"Failed to fetch stargazers for {repo_name}. Status Code: {response.status_code}")
            return None

    print(f"Number of fetched stargazers for {repo_name}: {total_stargazers}")

    return stargazers

In [None]:
all_stargazers_df_recent_repos = pd.DataFrame(columns=['repository_name', 'username', 'user_id'])

In [None]:
# Iterate through the repositories in the JSON data and fetch stargazers
for repo in repository_data:
    owner = repo['owner']['login']
    name = repo['name']

    api_token = "YOUR_TOKEN"

    stargazers = stargazers_for_recent_repos(owner, name, api_token)

    if stargazers:
        print(f"Stargazers data for {name} fetched successfully.")
        all_stargazers_df_recent_repos = pd.concat([all_stargazers_df_recent_repos, pd.DataFrame(stargazers)], ignore_index=True)
    else:
        print(f"Failed to fetch stargazers data for {name}.")

Number of fetched stargazers for ore-miner: 461
Stargazers data for ore-miner fetched successfully.
Number of fetched stargazers for ClangQL: 428
Stargazers data for ClangQL fetched successfully.
Number of fetched stargazers for offset-allocator: 351
Stargazers data for offset-allocator fetched successfully.
Number of fetched stargazers for good_training_language: 316
Stargazers data for good_training_language fetched successfully.
Number of fetched stargazers for nix-inspect: 243
Stargazers data for nix-inspect fetched successfully.
Number of fetched stargazers for ore-cli-gpu: 230
Stargazers data for ore-cli-gpu fetched successfully.
Number of fetched stargazers for jailbreak-11: 176
Stargazers data for jailbreak-11 fetched successfully.
Number of fetched stargazers for haystackdb: 153
Stargazers data for haystackdb fetched successfully.
Number of fetched stargazers for three_body: 150
Stargazers data for three_body fetched successfully.
Number of fetched stargazers for binary_greedy

In [None]:
all_stargazers_df_recent_repos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4497 entries, 0 to 4496
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   repository_name  4497 non-null   object
 1   username         4497 non-null   object
 2   user_id          4497 non-null   object
dtypes: object(3)
memory usage: 105.5+ KB


In [None]:
all_stargazers_df_recent_repos.to_csv('all_stargazers_for_recent_repos.csv', index=False)
print("Stargazers data saved to CSV file.")

all_stargazers_df_recent_repos.to_json('all_stargazers_for_recent_repos.json', orient='records', lines=True)
print("Stargazers data saved to JSON file.")

Stargazers data saved to CSV file.
Stargazers data saved to JSON file.


In [None]:
folder_path = '/content/drive/MyDrive/PMF 2 4/STROJNO + MREŽE/Data'

all_stargazers_df_recent_repos.to_csv(folder_path + 'all_stargazers_for_recent_repos.csv', index=False)
all_stargazers_df_recent_repos.to_json(folder_path + 'all_stargazers_for_recent_repos.json', orient='records', lines=True)

print("Files saved to Google Drive.")

Files saved to Google Drive.
