# Embeddings Projects Demo

This notebook demonstrates simplified versions of two embeddings-based projects using only standard Python libraries.

In [1]:
from collections import Counter
import math

def embed(text):
    tokens=text.lower().split()
    return Counter(tokens)

def cosine(a,b):
    all_keys=set(a)|set(b)
    dot=sum(a.get(k,0)*b.get(k,0) for k in all_keys)
    na=math.sqrt(sum(v*v for v in a.values()))
    nb=math.sqrt(sum(v*v for v in b.values()))
    return dot/(na*nb) if na and nb else 0.0


## 1. Ask-NAAC-Anything (Toy Example)

In [2]:
docs={
    'SSR_Criterion3.txt': 'Extension activities carry forty percent of marks under Metric 3.3.',
    'Minutes.txt': 'Meeting discussed outreach programs and extension activities weightage.',
    'AQAR.txt': 'Criterion 3 emphasises research and extension work.'
}
embeddings={fn:embed(txt) for fn,txt in docs.items()}
query='What is the weightage for Extension Activities?'
q_vec=embed(query)
results=sorted(((cosine(q_vec,vec),fn) for fn,vec in embeddings.items()), reverse=True)
for score,fn in results:
    print(fn,'->',round(score,2))


AQAR.txt -> 0.14
Minutes.txt -> 0.13
SSR_Criterion3.txt -> 0.12


## 2. Project Abstract Similarity & Diversity Dashboard

In [3]:
abstracts=[
    {'title':'IoT Soil Sensor','text':'A solar-powered sensor measures moisture and nutrients.'},
    {'title':'Crop Yield Predictor','text':'Machine learning predicts farm yield from sensor data.'},
    {'title':'ECG Anomaly Detector','text':'CNN models detect anomalies in ECG signals.'},
    {'title':'Soil Sensor Variant','text':'Solar sensor tracks soil moisture efficiently.'}
]
vecs=[embed(a['text']) for a in abstracts]
# find duplicates
n=len(vecs)
dups=[]
for i in range(n):
    for j in range(i+1,n):
        sim=cosine(vecs[i],vecs[j])
        if sim>0.8:
            dups.append((abstracts[i]['title'], abstracts[j]['title'], round(sim,2)))
print('Duplicates:', dups)
# simple 2-cluster by first token
clusters={}
for a in abstracts:
    key=a['text'].split()[0]
    clusters.setdefault(key,[]).append(a['title'])
print('Clusters:', clusters)


Duplicates: []
Clusters: {'A': ['IoT Soil Sensor'], 'Machine': ['Crop Yield Predictor'], 'CNN': ['ECG Anomaly Detector'], 'Solar': ['Soil Sensor Variant']}
