In [1]:
import pandas as pd  
import requests  
import json  
  
# 读取CSV文件  
file_path = 'items.csv'  
df = pd.read_csv(file_path)  
  
# 定义一个函数来处理github_repo列并获取2024-09至2025-08的总和
def get_2025_value(repo_url):  
    if pd.isna(repo_url):  
        return None  
    
    # 提取仓库路径  
    repo_path = repo_url.split('github.com/')[-1].rstrip('/')  
    
    # 构造下载URL  
    download_url = f'https://oss.open-digger.cn/github/{repo_path}/openrank.json'  
    
    try:  
        # 下载JSON数据  
        response = requests.get(download_url)  
        response.raise_for_status()  
        json_data = response.json()  
        
        # 计算2024-09至2025-08的总和
        total = 0
        count = 0
        
        # 检查数据是否是字典格式
        if isinstance(json_data, dict):
            for key, value in json_data.items():
                # 检查键是否符合YYYY-MM格式
                if len(key) == 7 and key.startswith(('2024-', '2025-')):
                    year_month = key.split('-')
                    year = int(year_month[0])
                    month = int(year_month[1])
                    
                    # 检查是否是2024-09到2025-08期间
                    if (year == 2024 and month >= 9) or (year == 2025 and month <= 8):
                        try:
                            total += float(value)
                            count += 1
                        except (ValueError, TypeError):
                            continue
        
        # 如果没有找到任何数据，返回None
        return total if count > 0 else None
        
    except Exception as e:  
        print(f'Error processing {repo_url}: {e}')  
        return None   
  
# 应用函数并创建新列  
df['2025_value'] = df['github_repo'].apply(get_2025_value)  
  
# 保存修改后的CSV文件  
output_file_path = 'items_with_2025_value_2.csv'  
df.to_csv(output_file_path, index=False)  
  
print(f'Processed file saved to {output_file_path}')

Error processing https://github.com/graphcore/poptorch: 404 Client Error: Not Found for url: https://oss.open-digger.cn/github/graphcore/poptorch/openrank.json
Error processing https://github.com/naver/roma: 404 Client Error: Not Found for url: https://oss.open-digger.cn/github/naver/roma/openrank.json
Error processing https://github.com/TorchDrift/TorchDrift: 404 Client Error: Not Found for url: https://oss.open-digger.cn/github/TorchDrift/TorchDrift/openrank.json
Processed file saved to items_with_2025_value_2.csv


In [3]:
import csv

def count_non_empty_github_repos(input_file):
    count = 0
    
    with open(input_file, mode='r', newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        
        # 检查是否存在github_repo列
        if 'github_repo' not in reader.fieldnames:
            print("错误：CSV文件中没有'github_repo'列")
            return
        
        # 读取所有行并计数
        for row in reader:
            if row['github_repo'].strip():  # 检查是否为空或仅包含空白字符
                count += 1
    
    print(f"github_repo列中非空值的数量: {count}")

# 使用示例
input_csv = 'items.csv'
count_non_empty_github_repos(input_csv)

github_repo列中非空值的数量: 119
