In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import os

# Define file paths
file_paths = [
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/机器人.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/半导体.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/半导体材料.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/半导体设备.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/人工智能.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/计算机.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/软件开发.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/电子.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/自动化设备.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/通信设备.csv'
]

# Create output directory if it doesn't exist
output_dir = '/Users/ash/Desktop/毕业/writer/output/AI研报/image'
os.makedirs(output_dir, exist_ok=True)

# Load and preprocess each file
data_frames = []
for file_path in file_paths:
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Make a copy of the DataFrame
    df_processed = df.copy()
    
    # Convert 'trade_date' to datetime format
    df_processed['trade_date'] = pd.to_datetime(df_processed['trade_date'], format='%Y%m%d')
    
    # Extract year, month, and day
    df_processed['year'] = df_processed['trade_date'].dt.year
    df_processed['month'] = df_processed['trade_date'].dt.month
    df_processed['day'] = df_processed['trade_date'].dt.day
    
    # Get the industry name from the filename
    industry_name = os.path.basename(file_path).replace('.csv', '')
    df_processed['industry'] = industry_name
    
    data_frames.append(df_processed)

# Combine all data into one DataFrame for plotting
combined_df = pd.concat(data_frames)

# Plotting setup
font_path = "/Library/Fonts/SimHei.ttf"
font_prop = FontProperties(fname=font_path)

fig, ax = plt.subplots(figsize=(12, 8))

# Plot each industry's total_mv over time
for industry, group in combined_df.groupby('industry'):
    ax.plot(group['trade_date'], group['total_mv'], label=industry)

# Formatting the plot
ax.set_xlabel('交易日期', fontproperties=font_prop)
ax.set_ylabel('总市值 (万元)', fontproperties=font_prop)
ax.set_title('申万行业指数总市值趋势', fontproperties=font_prop)
ax.legend(prop=font_prop)
plt.xticks(rotation=45)

# Save the plot
output_path = os.path.join(output_dir, '申万行业指数总市值趋势.png')
fig.savefig(output_path, bbox_inches='tight', dpi=300)
plt.close()


FileNotFoundError: [Errno 2] No such file or directory: '/Users/ash/Desktop/毕业/writer/data/申万行业指数/人工智能.csv'

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import os

# Define file paths
file_paths = [
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/机器人.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/半导体.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/半导体材料.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/半导体设备.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/人工智能.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/计算机.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/软件开发.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/电子.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/自动化设备.csv',
    '/Users/ash/Desktop/毕业/writer/data/申万行业指数/通信设备.csv'
]

# Create output directory if it doesn't exist
output_dir = '/Users/ash/Desktop/毕业/writer/output/AI研报/image'
os.makedirs(output_dir, exist_ok=True)

# Load and preprocess each file
data_frames = []
for file_path in file_paths:
    try:
        # Check if file exists
        if not os.path.exists(file_path):
            print(f"File not found, skipping: {file_path}")
            continue
            
        # Read the CSV file
        df = pd.read_csv(file_path)
        
        # Make a copy of the DataFrame
        df_processed = df.copy()
        
        # Convert 'trade_date' to datetime format
        df_processed['trade_date'] = pd.to_datetime(df_processed['trade_date'], format='%Y%m%d')
        
        # Extract year, month, and day
        df_processed['year'] = df_processed['trade_date'].dt.year
        df_processed['month'] = df_processed['trade_date'].dt.month
        df_processed['day'] = df_processed['trade_date'].dt.day
        
        # Get the industry name from the filename
        industry_name = os.path.basename(file_path).replace('.csv', '')
        df_processed['industry'] = industry_name
        
        data_frames.append(df_processed)
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        continue

# Check if we have any data to plot
if not data_frames:
    print("No valid data files found to process")
else:
    # Combine all data into one DataFrame for plotting
    combined_df = pd.concat(data_frames)

    # Plotting setup
    font_path = "/Library/Fonts/SimHei.ttf"
    font_prop = FontProperties(fname=font_path)

    fig, ax = plt.subplots(figsize=(12, 8))

    # Plot each industry's total_mv over time
    for industry, group in combined_df.groupby('industry'):
        ax.plot(group['trade_date'], group['total_mv'], label=industry)

    # Formatting the plot
    ax.set_xlabel('交易日期', fontproperties=font_prop)
    ax.set_ylabel('总市值 (万元)', fontproperties=font_prop)
    ax.set_title('申万行业指数总市值趋势', fontproperties=font_prop)
    ax.legend(prop=font_prop)
    plt.xticks(rotation=45)

    # Save the plot
    output_path = os.path.join(output_dir, '申万行业指数总市值趋势.png')
    fig.savefig(output_path, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"Plot saved successfully to {output_path}")


File not found, skipping: /Users/ash/Desktop/毕业/writer/data/申万行业指数/人工智能.csv


Plot saved successfully to /Users/ash/Desktop/毕业/writer/output/AI研报/image/申万行业指数总市值趋势.png


In [3]:
# Enhance the existing plot with better visualization
font_path = "/Library/Fonts/SimHei.ttf"
font_prop = FontProperties(fname=font_path)

# Create a new figure with improved styling
fig, ax = plt.subplots(figsize=(14, 8))

# Use different line styles and markers for better distinction
line_styles = ['-', '--', '-.', ':']
markers = ['o', 's', '^', 'v', 'D', 'p', '*', 'h', 'x', '+']

for i, (industry, group) in enumerate(combined_df.groupby('industry')):
    # Cycle through different line styles and markers
    style = line_styles[i % len(line_styles)]
    marker = markers[i % len(markers)]
    ax.plot(group['trade_date'], group['total_mv'], 
            label=industry, 
            linestyle=style,
            marker=marker,
            markersize=4,
            linewidth=1.5)

# Improve plot formatting
ax.set_xlabel('交易日期', fontproperties=font_prop, fontsize=12)
ax.set_ylabel('总市值 (万元)', fontproperties=font_prop, fontsize=12)
ax.set_title('申万行业指数总市值趋势对比', fontproperties=font_prop, fontsize=14)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(prop=font_prop, bbox_to_anchor=(1.05, 1), loc='upper left')

# Format x-axis
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

# Save the enhanced plot
enhanced_output_path = os.path.join(output_dir, '申万行业指数总市值趋势_增强版.png')
fig.savefig(enhanced_output_path, bbox_inches='tight', dpi=300)
plt.close()

print(f"Enhanced plot saved successfully to {enhanced_output_path}")


Enhanced plot saved successfully to /Users/ash/Desktop/毕业/writer/output/AI研报/image/申万行业指数总市值趋势_增强版.png


In [4]:
# Verify the saved plots in the output directory
import os
output_dir = '/Users/ash/Desktop/毕业/writer/output/AI研报/image'

# List all files in the output directory that contain '申万行业指数'
saved_files = [f for f in os.listdir(output_dir) if '申万行业指数' in f and f.endswith('.png')]

print("Saved plot files containing '申万行业指数':")
for file in saved_files:
    print(f"- {file} (full path: {os.path.join(output_dir, file)})")
    
# If you need to save additional versions with different parameters, you can modify this:
if saved_files:  # If files exist, we can create another version
    fig, ax = plt.subplots(figsize=(14, 8))
    for i, (industry, group) in enumerate(combined_df.groupby('industry')):
        ax.plot(group['trade_date'], group['total_mv'], label=industry)
    ax.set_xlabel('交易日期', fontproperties=font_prop)
    ax.set_ylabel('总市值 (万元)', fontproperties=font_prop)
    ax.set_title('申万行业指数总市值趋势-简洁版', fontproperties=font_prop)
    ax.legend(prop=font_prop, bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xticks(rotation=45)
    plt.tight_layout()
    simple_output_path = os.path.join(output_dir, '申万行业指数总市值趋势_简洁版.png')
    fig.savefig(simple_output_path, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"\nAdditional simple version saved to: {simple_output_path}")


Saved plot files containing '申万行业指数':
- 申万行业指数总市值趋势.png (full path: /Users/ash/Desktop/毕业/writer/output/AI研报/image/申万行业指数总市值趋势.png)
- 申万行业指数总市值趋势_增强版.png (full path: /Users/ash/Desktop/毕业/writer/output/AI研报/image/申万行业指数总市值趋势_增强版.png)



Additional simple version saved to: /Users/ash/Desktop/毕业/writer/output/AI研报/image/申万行业指数总市值趋势_简洁版.png


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import os

# Define font
font_path = "/Library/Fonts/SimHei.ttf"
font_prop = FontProperties(fname=font_path)

# List of files to process
file_paths = [
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/AIGC概念.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/AIPC.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/AI制药.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/AI手机.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/AI智能体.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/AI眼镜.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/AI芯片.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/AI语料.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/人工智能.csv',
    '/Users/ash/Desktop/毕业/writer/data/东方财富概念版块/多模态AI.csv'
]

# Create a dictionary to store all processed DataFrames
processed_data = {}

for file_path in file_paths:
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Make a copy of the DataFrame
    df_processed = df.copy()
    
    # Convert trade_date to datetime format
    df_processed['trade_date'] = pd.to_datetime(df_processed['trade_date'], format='%Y%m%d')
    
    # Extract year, month, and day
    df_processed['year'] = df_processed['trade_date'].dt.year
    df_processed['month'] = df_processed['trade_date'].dt.month
    df_processed['day'] = df_processed['trade_date'].dt.day
    
    # Store the processed DataFrame with filename as key
    file_name = os.path.basename(file_path).replace('.csv', '')
    processed_data[file_name] = df_processed

# Create a figure and axis for plotting
fig, ax = plt.subplots(figsize=(12, 6))

# Plot each file's data
for file_name, df in processed_data.items():
    ax.plot(df['trade_date'], df['total_mv'], label=file_name)

# Set plot properties
ax.set_xlabel('交易日期', fontproperties=font_prop)
ax.set_ylabel('总市值 (万元)', fontproperties=font_prop)
ax.set_title('东方财富概念版块总市值趋势', fontproperties=font_prop)
ax.legend(prop=font_prop)
plt.xticks(rotation=45)

# Ensure output directory exists
output_dir = '/Users/ash/Desktop/毕业/writer/output/AI研报/image'
os.makedirs(output_dir, exist_ok=True)

# Save the figure
output_path = os.path.join(output_dir, '东方财富概念版块总市值趋势.png')
fig.savefig(output_path, bbox_inches='tight', dpi=300)
plt.close()

# Show the processed data for the first file as an example
print("Processed data example (first file):")
processed_data[next(iter(processed_data))].head()


Processed data example (first file):


Unnamed: 0,name,trade_date,pct_change,total_mv,up_num,down_num,year,month,day
0,AIGC概念,2024-12-20,1.35,201853904.0,76,27,2024,12,20
1,AIGC概念,2024-12-21,1.35,201853904.0,76,27,2024,12,21
2,AIGC概念,2024-12-22,1.35,201853904.0,76,27,2024,12,22
3,AIGC概念,2024-12-23,-6.4,194201880.0,3,104,2024,12,23
4,AIGC概念,2024-12-24,0.07,196756819.2,59,44,2024,12,24


In [2]:
# Enhanced analysis with correlation matrix and descriptive statistics
import numpy as np

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))

# Plot 1: Time series of total_mv (same as before but on first subplot)
for file_name, df in processed_data.items():
    ax1.plot(df['trade_date'], df['total_mv'], label=file_name)
ax1.set_xlabel('交易日期', fontproperties=font_prop)
ax1.set_ylabel('总市值 (万元)', fontproperties=font_prop)
ax1.set_title('东方财富概念版块总市值趋势', fontproperties=font_prop)
ax1.legend(prop=font_prop)
ax1.tick_params(axis='x', rotation=45)

# Plot 2: Correlation heatmap between numerical features (using first file as example)
sample_df = processed_data[next(iter(processed_data))]
numerical_cols = sample_df.select_dtypes(include=[np.number]).columns.tolist()
corr_matrix = sample_df[numerical_cols].corr()

im = ax2.imshow(corr_matrix, cmap='coolwarm', vmin=-1, vmax=1)
ax2.set_xticks(np.arange(len(corr_matrix.columns)))
ax2.set_yticks(np.arange(len(corr_matrix.columns)))
ax2.set_xticklabels(corr_matrix.columns, rotation=45, fontproperties=font_prop)
ax2.set_yticklabels(corr_matrix.columns, fontproperties=font_prop)
plt.colorbar(im, ax=ax2)
ax2.set_title('数值特征相关性矩阵', fontproperties=font_prop)

# Save the combined figure
output_path = os.path.join(output_dir, '东方财富概念版块分析与相关性.png')
fig.savefig(output_path, bbox_inches='tight', dpi=300)
plt.close()

# Show descriptive statistics
print("描述性统计 (以第一个文件为例):")
display(sample_df.describe())


描述性统计 (以第一个文件为例):


Unnamed: 0,trade_date,pct_change,total_mv,up_num,down_num,year,month,day
count,58,58.0,58.0,58.0,58.0,58.0,58.0,58.0
mean,2025-01-17 12:00:00,-0.147586,187543600.0,51.827586,57.706897,2024.793103,3.534483,15.896552
min,2024-12-20 00:00:00,-6.4,168375100.0,1.0,0.0,2024.0,1.0,1.0
25%,2025-01-03 06:00:00,-1.56,180906200.0,28.0,36.0,2025.0,1.0,8.0
50%,2025-01-17 12:00:00,-0.355,184219000.0,50.5,56.0,2025.0,1.0,15.0
75%,2025-01-31 18:00:00,1.35,195577400.0,76.0,81.75,2025.0,2.0,24.0
max,2025-02-15 00:00:00,7.37,211237700.0,112.0,110.0,2025.0,12.0,31.0
std,,2.84327,12163760.0,32.356641,32.559262,0.408619,4.381843,9.314561


In [3]:
# Verify the saved plots exist in the output directory
output_dir = '/Users/ash/Desktop/毕业/writer/output/AI研报/image'
saved_files = [
    '东方财富概念版块总市值趋势.png',
    '东方财富概念版块分析与相关性.png'
]

for file in saved_files:
    file_path = os.path.join(output_dir, file)
    if os.path.exists(file_path):
        print(f"Plot successfully saved: {file_path}")
    else:
        print(f"Warning: Plot not found at {file_path}")


Plot successfully saved: /Users/ash/Desktop/毕业/writer/output/AI研报/image/东方财富概念版块总市值趋势.png
Plot successfully saved: /Users/ash/Desktop/毕业/writer/output/AI研报/image/东方财富概念版块分析与相关性.png
