## Generating Text Rank Score

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx

# Load your dataset
df = pd.read_csv('/content/drive/MyDrive/BugSum-master/excel sheet/scores.csv')

# Function to convert string embeddings back to numpy arrays
def string_to_array(s):
    return np.array(list(map(float, s.strip('[]').split())))

# Apply the conversion function to your embeddings column
df['Sentence_Embeddings'] = df['Sentence_Embeddings'].apply(string_to_array)

# Stack all sentence embeddings into a matrix
embeddings_matrix = np.stack(df['Sentence_Embeddings'].values)

# Calculate cosine similarity matrix
similarity_matrix = cosine_similarity(embeddings_matrix)

# Create a graph from the similarity matrix
G = nx.from_numpy_array(similarity_matrix)

# Use PageRank to calculate scores
pagerank_scores = nx.pagerank(G, alpha=0.75)

# Add PageRank scores to the dataframe
df['TextRank_Score'] = df.index.map(pagerank_scores.get)

# Sort the DataFrame based on the TextRank scores to see the top-ranked sentences
df_sorted = df.sort_values(by='TextRank_Score', ascending=False)

# Save the updated dataframe to a new CSV file
df.to_csv('/content/drive/MyDrive/BugSum-master/excel sheet/scores.csv', index=False)

# Display the top-ranked sentences
print(df_sorted[['BugNum', 'SenNumber', 'Sentence', 'TextRank_Score']].head())



      BugNum  SenNumber                                           Sentence  \
3474      83        1.7  this means that if people have gimp-2.0 instal...   
129        3        8.3  after moving to wtp 2.0, the project will no l...   
2841      56       15.7  note: saving as draft an email with no recipie...   
1844      38        7.3  > it might be worth adding another flag that i...   
1832      38        5.2  it might be worth adding another flag that is ...   

      TextRank_Score  
3474        0.000275  
129         0.000275  
2841        0.000275  
1844        0.000275  
1832        0.000275  
