In [None]:
import pandas as pd
import numpy as np

class MediaMatrix:
    def __init__(self, excel_file_path, output_excel_path):
        self.excel_file_path = excel_file_path
        self.output_excel_path = output_excel_path

    def read_excel_to_vectors(self, sheet_name):
        df = pd.read_excel(self.excel_file_path, sheet_name=sheet_name, index_col=0)
        df = df.apply(pd.to_numeric, errors='coerce')

        vectors = [np.array(row) for _, row in df.iterrows()]
        row_labels = df.index.tolist()
        col_labels = df.columns.tolist()
        return vectors, row_labels, col_labels

    def cosine_similarity_matrix(self, vectors):
        num_vectors = len(vectors)
        similarity_matrix = np.zeros((num_vectors, num_vectors))

        for i in range(num_vectors):
            for j in range(num_vectors):
                if i == j:
                    similarity_matrix[i, j] = 0
                else:
                    similarity_matrix[i, j] = self.cosine_similarity(vectors[i], vectors[j])

        return similarity_matrix

    def cosine_similarity(self, vector1, vector2):
        dot_product = np.dot(vector1, vector2)
        norm_vector1 = np.linalg.norm(vector1)
        norm_vector2 = np.linalg.norm(vector2)
        similarity = dot_product / (norm_vector1 * norm_vector2)
        return similarity

    def generate_similarity_matrices(self):
        with pd.ExcelFile(self.excel_file_path) as xls:
            writer = pd.ExcelWriter(self.output_excel_path, engine='xlsxwriter')

            for sheet_name in xls.sheet_names:
                vectors, row_labels, col_labels = self.read_excel_to_vectors(sheet_name)
                similarity_matrix = self.cosine_similarity_matrix(vectors)

                similarity_df = pd.DataFrame(similarity_matrix, index=row_labels, columns=row_labels)
                similarity_df.to_excel(writer, sheet_name=sheet_name, index=True)

            writer.close()

        print(f"余弦相似度矩阵已保存到 {self.output_excel_path}")

# 示例用法：
excel_file_path = r'E:\课题组\个人\论文\多系统风险传导最短路径\network\全年network\media_event_allyear.xlsx'  # 替换成你的Excel文件路径
output_excel_path = r'E:\课题组\个人\论文\多系统风险传导最短路径\network\全年network\03media_allyear1.xlsx'  # 替换成你想要保存的新Excel文件路径

calculator = MediaMatrix(excel_file_path, output_excel_path)
calculator.generate_similarity_matrices()
