In [8]:
import pandas as pd

# Load the data
df = pd.read_csv("TDX_data_UTF8_small.csv")

# Drop the rows with missing descriptions
df = df.dropna(subset=["Description"])

In [9]:
import sklearn
from transformers import BertTokenizer, BertModel
import torch

In [10]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def extractive_summary(description):
    # Check if description is not a string or NaN
    if not isinstance(description, str):
        return None

    # Tokenize each sentence
    sentences = description.split('.')
    embeddings = []
    for sent in sentences:
        inputs = tokenizer(sent, return_tensors="pt", truncation=True, max_length=128, padding="max_length")
        with torch.no_grad():
            output = model(**inputs)
        embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy())

    # Get embedding for the entire description
    inputs = tokenizer(description, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
    with torch.no_grad():
        output = model(**inputs)
    desc_embedding = output.last_hidden_state.mean(dim=1).squeeze().numpy()

    # Find the most similar sentence
    from sklearn.metrics.pairwise import cosine_similarity
    similarities = cosine_similarity([desc_embedding], embeddings)
    most_similar_idx = similarities.argmax()

    return sentences[most_similar_idx]

df['Summary'] = df['Description'].apply(extractive_summary)
print(df)


        ID                                     Title  \
1   114115                    IT Applicant Questions   
2   114113                            Display issues   
3   114112                  Loaner Laptop Won't Boot   
4   114111                       Esports room access   
5   114109                      ethernet not working   
..     ...                                       ...   
60  114029                   Request for DUO Support   
61  114028                          Remove RA Access   
62  114027        Setting up room with TVs for event   
63  114026   Config a mstp priority to core routers.   
64  114025  Keep Learning Student Technology Request   

                                          Description  \
1   Client came up to me at the desk asking genera...   
2   Client visited the desk saying that the displa...   
3   Client recently loaned out a laptop, but has b...   
4   Client needs access to the e-sports room becau...   
5   Ari lost internet connection through h

In [16]:
with open('output.txt', 'w') as f:
  for i in range(10):
    f.write(str(df["Summary"].get(i)))