In [21]:
import ollama
import readability
import language_tool_python
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer, util

In [22]:
# inference using the llama3 model
response = ollama.chat(model='phi3', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])
print(response['message']['content'])

The sky appears blue to us because of a process called Rayleigh scattering. As sunlight enters Earth's atmosphere, it is made up of different colors that correspond to various wavelengths of light. Shorter wavelengths (blue and violet) are scattered in all directions by the gases and particles in the Earth's atmosphere much more effectively than longer wavelengths (like red). Although both blue and violet light get scattered, our eyes are more sensitive to blue light, which is why we perceive the sky as being blue. Additionally, some of the violet light gets absorbed by the upper atmosphere, leaving predominantly blue light visible during daylight hours.


In [23]:
query = 'Why is the sky blue?'
response = response['message']['content']

In [25]:
# create the embeddings
embeddings = SentenceTransformer('all-MiniLM-L6-v2')
query_embedding = embeddings.encode('Why is the sky blue?')
response_embedding = embeddings.encode(response)

In [26]:
# calculate cosine similarity
similarity = util.cos_sim(query_embedding, response_embedding)
print('Relevance Score:', similarity.item())

Relevance Score: 0.7495800256729126


In [27]:
# check for grammatical errors
tool = language_tool_python.LanguageTool('en-US')
matches = tool.check(response)
print('Number of grammatical errors:', len(matches))

Number of grammatical errors: 0


In [28]:
# check for automated readability index
results = readability.getmeasures(response, lang='en')
print('Automated Readability Index:', results['readability grades']['ARI'])

Automated Readability Index: 55.928130841121494


In [29]:
# evaluate sentiment consistency
query_sentiment = TextBlob(query).sentiment.polarity
response_sentiment = TextBlob(response).sentiment.polarity
print('Sentiment Consistency Score:', abs(query_sentiment - response_sentiment))

Sentiment Consistency Score: 0.19999999999999998


In [30]:
# check concept coverage
vectorizer = TfidfVectorizer(stop_words='english')
tfidf = vectorizer.fit_transform([query, response])
feature_names = vectorizer.get_feature_names_out()
query_concepts = set([feature_names[i] for i in tfidf[0].nonzero()[1]])
response_concepts = set([feature_names[i] for i in tfidf[1].nonzero()[1]])
coverage = query_concepts.intersection(response_concepts)
concepts = response_concepts.difference(query_concepts)
print('Concepts Covered:', coverage)
print('New Concepts Introduced:', concepts)

Concepts Covered: {'blue', 'sky'}
New Concepts Introduced: {'leaving', 'longer', 'appears', 'hours', 'violet', 'directions', 'different', 'red', 'enters', 'sensitive', 'gets', 'correspond', 'absorbed', 'effectively', 'process', 'particles', 'scattered', 'sunlight', 'called', 'daylight', 'additionally', 'upper', 'visible', 'scattering', 'atmosphere', 'eyes', 'earth', 'shorter', 'rayleigh', 'light', 'colors', 'wavelengths', 'gases', 'various', 'predominantly', 'perceive', 'like'}
