In [1]:
import torch
from sentence_transformers import SentenceTransformer
import pandas as pd
import json

## Load data Set

In [2]:
df = pd.read_csv("../DataSets/processed/institutions_content_based_data_after_eda.csv",sep=";",header=0)

In [3]:
df.head()

Unnamed: 0,id,institution_name,institution_type,state,district,all_course_descriptions,all_degree_descriptions,all_degree_names,all_stream_names,all_course_names
0,1,Annai Velakanni College For Women,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in English is a versatil...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Biochemistry, Business..."
1,2,Alpha Arts And Science College,private,Tamil Nadu,Chennai,Bachelor of Business Administration (BBA) in B...,"Bachelor of Business Administration, Bachelor ...","B.Com, B.Sc, BBA, BCA","Biotechnology, Commerce, Management, Science","Biotechnology, Business Administration, Comput..."
2,3,Agurchand Manmull Jain College,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in Criminology and Polic...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Banking Management, Bu..."
3,4,Anna Adarsh College For Women,private,Tamil Nadu,Chennai,B.Com (Hons) program designed to provide stude...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Banking Management, Bu..."
4,5,Annai Violet Arts and Science College,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in Economics offers stud...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Biochemistry, Business..."


## Load model for embedding

In [4]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [5]:
df["combined_text"] = df[
    ["institution_name", "institution_type", "state", "district", 
     "all_course_descriptions", "all_degree_descriptions", "all_degree_names", 
     "all_stream_names", "all_course_names"]
].astype(str).agg(" ".join, axis=1)


In [6]:
df.head()

Unnamed: 0,id,institution_name,institution_type,state,district,all_course_descriptions,all_degree_descriptions,all_degree_names,all_stream_names,all_course_names,combined_text
0,1,Annai Velakanni College For Women,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in English is a versatil...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Biochemistry, Business...",Annai Velakanni College For Women private Tami...
1,2,Alpha Arts And Science College,private,Tamil Nadu,Chennai,Bachelor of Business Administration (BBA) in B...,"Bachelor of Business Administration, Bachelor ...","B.Com, B.Sc, BBA, BCA","Biotechnology, Commerce, Management, Science","Biotechnology, Business Administration, Comput...",Alpha Arts And Science College private Tamil N...
2,3,Agurchand Manmull Jain College,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in Criminology and Polic...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Banking Management, Bu...",Agurchand Manmull Jain College private Tamil N...
3,4,Anna Adarsh College For Women,private,Tamil Nadu,Chennai,B.Com (Hons) program designed to provide stude...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Banking Management, Bu...",Anna Adarsh College For Women private Tamil Na...
4,5,Annai Violet Arts and Science College,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in Economics offers stud...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Biochemistry, Business...",Annai Violet Arts and Science College private ...


In [7]:
df["embeddings"] = df["combined_text"].apply(lambda x: model.encode(x))

In [8]:
df.head()

Unnamed: 0,id,institution_name,institution_type,state,district,all_course_descriptions,all_degree_descriptions,all_degree_names,all_stream_names,all_course_names,combined_text,embeddings
0,1,Annai Velakanni College For Women,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in English is a versatil...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Biochemistry, Business...",Annai Velakanni College For Women private Tami...,"[-0.03530252, 0.0064196526, -0.07201576, 0.023..."
1,2,Alpha Arts And Science College,private,Tamil Nadu,Chennai,Bachelor of Business Administration (BBA) in B...,"Bachelor of Business Administration, Bachelor ...","B.Com, B.Sc, BBA, BCA","Biotechnology, Commerce, Management, Science","Biotechnology, Business Administration, Comput...",Alpha Arts And Science College private Tamil N...,"[-0.0340493, -0.010598709, -0.06505797, -0.056..."
2,3,Agurchand Manmull Jain College,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in Criminology and Polic...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Banking Management, Bu...",Agurchand Manmull Jain College private Tamil N...,"[-0.028798798, -0.011772328, -0.123717144, -0...."
3,4,Anna Adarsh College For Women,private,Tamil Nadu,Chennai,B.Com (Hons) program designed to provide stude...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Banking Management, Bu...",Anna Adarsh College For Women private Tamil Na...,"[-0.036633845, -0.008536323, -0.036580175, 0.0..."
4,5,Annai Violet Arts and Science College,private,Tamil Nadu,Chennai,Bachelor of Arts (BA) in Economics offers stud...,"Bachelor of Arts, Bachelor of Business Adminis...","B.A, B.Com, B.Sc, BBA, BCA","Arts, Commerce, Management, Science","Accounting and Finance, Biochemistry, Business...",Annai Violet Arts and Science College private ...,"[-0.019887945, -0.004737399, -0.024683991, 0.0..."


In [9]:
df["embeddings"] = df["embeddings"].apply(lambda x: ",".join(map(str, x)))

In [10]:
df.to_csv("../DataSets/processed/institutions_content_based_data_after_eda_after_embedding.csv",index=False,sep=";")