In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import spacy
import os
import torch
import numpy as np

nlp = spacy.load("en_core_web_sm")

In [2]:
texts = pd.read_csv("../dat/texts.csv")
texts.head()

Unnamed: 0,level,topic,text
0,A1,friends,Friends are people who you like and enjoy spen...
1,A2,friends,Friends are people who we like and care about....
2,B1,friends,Friends are people who we have a close relatio...
3,B2,friends,Friends are people who are close to us and who...
4,C1,friends,Friends are a vital part of our lives. They pr...


In [3]:
def get_sentences(text):
    doc = nlp(text)
    sentences = [token.text.strip() for token in doc.sents]
    return list(sentences)

texts["sentences"] = texts.text.apply(get_sentences)
texts["sentences"][0]

['Friends are people who you like and enjoy spending time with.',
 'They are there for you when you need them and make you laugh.',
 'You can talk to them about anything and they will always be there to listen.',
 'Friends are important because they make life more fun and they help you to grow as a person.']

In [4]:
texts = texts.explode("sentences")

In [5]:
embeddings_model = SentenceTransformer('llmrails/ember-v1')
embeddings = embeddings_model.encode(list(texts["sentences"]))

In [6]:
embeddings.shape

(89, 1024)

In [7]:
class FeedforwardNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(FeedforwardNN, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, hidden_dim)
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(hidden_dim, 1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        hidden = self.relu(self.fc1(x))
        output = self.sigmoid(self.fc2(hidden))
        return output

In [8]:
egp = pd.read_csv('../dat/egponline.csv')

In [9]:
for model_file in os.listdir("../models"):
    print(str(egp[egp['#']==int(model_file[:-4])].iloc[0]['Can-do statement']))
    model = torch.load(f"../models/{model_file}")
    model.eval()
    outputs = model(torch.Tensor(embeddings))
    sents = np.array(texts["sentences"])
    
    detected_sents = sents[np.array((outputs>0.5).squeeze())]
    print(np.unique(detected_sents))

Can form conditional subordinate clauses with 'if' + past simple, and would in the main clause, to talk about an imagined situation, often in the context of advice or opinion-giving. 
['* Be kind' '* Be positive.' '* Be supportive.' 'They are a gift.'
 'They make us laugh.']
Can use prepositional phrases in the front position for focus.
['* Be kind' '* Be positive.' '* Be supportive.' 'They are a gift.'
 'They make us laugh.']
Can use an increasing range of adjectives before a noun to express intensity.
['* Be kind' '* Be positive.' '* Be supportive.' 'They are a gift.'
 'They make us laugh.' 'Third, be a good friend.']
Can use nouns and noun phrases as complements of the verb 'be'. 
['* Be kind' '* Be supportive.' 'They make us laugh.']
Can use the past perfect continuous in a relative clause to give background information.  ► relative clauses
['* Be kind' '* Be positive.' '* Be supportive.'
 'Some friends are male, while others are female.' 'They make us laugh.']
Can use 'anything' w