In [42]:
import pandas as pd
import numpy as np

def descriptive_to_numeric(amount):
    if pd.isnull(amount):
        return np.nan
    if isinstance(amount, str):
        try:
            if '亿' in amount:
                amount = amount.replace('亿', '')
                return float(amount) * 1e8
            elif '万' in amount:
                amount = amount.replace('万', '')
                return float(amount) * 1e4
            elif amount == '数百万':
                return 5e6  # 假设数百万大约为 500万
            elif amount == '数千万':
                return 5e7  # 假设数千万大约为 5000万
            elif amount == '未透露':
                return np.nan
            else:
                return float(amount)  # 处理普通数字字符串
        except ValueError:
            return np.nan
    elif isinstance(amount, (int, float)):
        return amount
    else:
        raise TypeError(f"无法处理的金额类型: {type(amount)}")

def currency_to_cny(amount, currency):
    if pd.isna(currency):
        currency = '人民币'
    return amount * exchange_rates.get(currency, 1)

exchange_rates = {
    '美元': 6.3,
    '人民币': 1,
    '港元': 0.8,
    # Add other currencies and their exchange rates as needed
}

df = pd.read_csv('企业服务-上市公司列表_310条.csv', encoding='utf-8')

df['金额'] = df['金额'].apply(descriptive_to_numeric)

df['等值人民币(万)'] = df.apply(lambda row: currency_to_cny(row['金额'], row['币种']), axis=1)

# df.sort_values(by=['公司ID', '融资时间'], inplace=True)

company_info_columns = ['公司ID', '公司简称', '工商全称', '统一信用代码', '一句话简介',  
                        '行业', '子行业', '省', '市', '地址', '成立时间', '运营状态',
                        '融资总额(万人民币)', '估值(万人民币)', '估值(估算-万人民币)', 
                        '年营收(亿元)', '年度利润(亿元)', '员工人数']
df[company_info_columns] = df[company_info_columns].ffill()

df.to_csv('ffilled_企业服务-上市公司列表.csv', index=False)

df['融资时间'] = pd.to_datetime(df['融资时间'], errors='coerce')

# 计算融资次数
financing_count = df.groupby(['公司ID', '轮次']).size().reset_index(name='count')
financing_count = financing_count.groupby('公司ID')['count'].sum().reset_index(name='融资次数')
print(financing_count)

# 计算融资总额
# financing_sum = df.groupby('公司ID')['等值人民币(万)'].sum().reset_index(name='融资总额')

company_overview = df[company_info_columns].drop_duplicates()
company_overview = pd.merge(company_overview, financing_count, on='公司ID', how='left')
# company_overview = pd.merge(company_overview, financing_sum, on='公司ID', how='left')

financing_history_columns = ['公司简称', '融资时间', '轮次', '金额', '币种',  
                             '等值人民币(万)', '投资机构']
financing_history = df[financing_history_columns]

df.to_csv('processed_企业服务-上市公司列表.csv', index=False)
company_overview.to_csv('公司概况.csv', index=False) 
financing_history.to_csv('融资历史.csv', index=False)

           公司ID  融资次数
0         227.0     5
1        1969.0     5
2        2428.0     5
3        3016.0     4
4        3151.0    10
..          ...   ...
103  42081108.0     1
104  42758180.0     3
105  42759306.0     2
106  42782781.0     1
107  42812312.0     5

[108 rows x 2 columns]
