In [None]:
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# 加载数据集
df = pd.read_csv('test_sentiment.csv')
# 检查数据类型和缺失值情况
print('数据基本信息：')
df.info()

In [None]:
# 统计情感标签分布情况
sentiment_distribution = df['sentiment'].value_counts()
print('情感标签分布情况：')
print(sentiment_distribution)

In [None]:
# 将 date 列转换为日期时间类型
df['date'] = pd.to_datetime(df['date'])

In [None]:
# 按年月统计情感标签数量
monthly_sentiment = df.groupby(df['date'].dt.to_period('M'))['sentiment'].value_counts().unstack(fill_value=0)
print('每月情感标签数量：')
print(monthly_sentiment)

In [None]:
# 设置图片清晰度
plt.rcParams['figure.dpi'] = 300
# 创建画布
fig, axes = plt.subplots(2, 1, figsize=(15, 12))

# 绘制情感标签分布柱状图
sentiment_distribution.plot(kind='bar', ax=axes[0])
axes[0].set_title('Distribution of emotional labels')
axes[0].set_xlabel('Emotional tags')
axes[0].set_ylabel('Quantity')
axes[0].tick_params(axis='x', rotation=0)

# 添加数据标签
for i, v in enumerate(sentiment_distribution):
    axes[0].text(i, v + 10, str(v), ha='center')

# 绘制每月情感标签数量折线图
monthly_sentiment.plot(kind='line', marker='o', ax=axes[1])
axes[1].set_title('The number of emotional tags per month')
axes[1].set_xlabel('Month and Year')
axes[1].set_ylabel('Emotional tags')
axes[1].legend(title='')

# 添加数据标签
for line in axes[1].lines:
    y = line.get_ydata()
    for i, value in enumerate(y):
        axes[1].text(line.get_xdata()[i], value, str(int(value)), ha='center', va='bottom')

# 自动调整布局
plt.tight_layout()

# 显示图形
plt.show()