In [22]:
import numpy as np
import pandas as pd
import hashlib
import os
import re
import xml.etree.ElementTree as ET

In [103]:
pd.set_option('display.max_colwidth', 1000)
#pd.set_option('display.width', 1000)

In [8]:
import xml.etree.ElementTree as ET
import pandas as pd


class GradeXML2DataFrame:

    def __init__(self, xml_data):
        xtree = ET.parse(xml_data)
        self.root = xtree.getroot()
        
    def extract_label(self, label_string):
        """Function that returns the class Id from original label string.
        0: correct
        1: correct_but_incomplete
        2: contradictory
        3: incorrect

        Args:
            label_string (String): original label in XML file

        Returns:
            class_id (Int): target class Id (0, 1, 2 or 3)
        """
        
        # Create list of all labels
        label_list = np.array(label_string.split('|'))
        # Keep only the label with flag (1)
        class_id = [i for i, s in enumerate(label_list) if "(1)" in s][0]
        return class_id
    
    
    def parse_instance(self, instance):
        """Reutrn a dictionary of instance attributes and their values."""
        
        info = {}
        info['instance_id'] = int(instance.attrib['ID'])
        for elem in instance:
            if elem.tag == 'MetaInfo':
                info['student_id'] = elem.attrib['StudentID']
                info['task_id'] = elem.attrib['TaskID']
            if elem.tag == 'Annotation':
                info['label'] = self.extract_label(elem.attrib['Label'])
            if elem.tag == 'ProblemDescription':
                info['problem_description'] = elem.text
            if elem.tag == 'Question':
                info['question'] = elem.text
            if elem.tag == 'Answer':
                info['answer'] = elem.text
            if elem.tag == 'ReferenceAnswers':
                info['reference_answers'] = re.sub('\n', ' ', elem.text.strip())
        return info
    
    
    def process_data(self):
        """Return a pandas dataframe containing all instances and their data."""
        instances = []
        for instance in self.root:
            info = self.parse_instance(instance)   
            instances.append(info) 
        columns = ["instance_id", "student_id", "task_id", "problem_description",
                   "question", "answer", "reference_answers", "label"]
        return pd.DataFrame(instances, columns=columns)
    

xml2df = GradeXML2DataFrame(os.path.join('data', 'grade_data.xml'))
data = xml2df.process_data()

In [9]:
data

Unnamed: 0,instance_id,student_id,task_id,problem_description,question,answer,reference_answers,label
0,1,DTSU040,LP03_PR09.bLK.sh,"A car windshield collides with a mosquito, squ...",How does Newton's third law apply to this situ...,the windshield will apply a force to the mosqu...,1: Since the windshield exerts a force on the...,1
1,2,DTSU035,FM_LV04_PR05.sh,Two hockey players pass a puck between them on...,What forces are acting on the puck while the p...,The normal force coming from the ice and the g...,1: The forces acting on the puck while it is ...,0
2,3,DTSU021,FM_LVxx_PR01,A rocket pushes a meteor with constant force. ...,Can you articulate Newton's second law?,"if there is a zero net force on the object, th...",1: Newton's 2nd Law says that the net force i...,3
3,4,DTSU033,LP03_PR09.bLK.sh,"A car windshield collides with a mosquito, squ...",Can you articulate a principle or definition w...,An equal force always balancing it out regardl...,"1: For every action, there is an equal and op...",3
4,5,DTSU015,FM_LV04_PR05,Two hockey players pass a puck between them on...,"Based on Newton's first law, what can you say ...",The speed of the puck will equal to the net fo...,1: The puck will move in a straight line with...,3
5,6,DTSU017,FM_LVxx_PR02.sh,A rocket is pushing a meteor with constant for...,How will the meteor move after the rocket stop...,The meteor will continue in a straight line wi...,"1: When the rocket stops pushing, no forces a...",0
6,7,DTSU028,LP03_PR09.bLK.sh,"A car windshield collides with a mosquito, squ...",How do the amounts of the force exerted on the...,The forces excerted by each are equal because ...,1: Since the windshield exerts a force on the...,0
7,8,DTSU020,FM_LVxx_PR01,A rocket pushes a meteor with constant force. ...,Given that the meteor starts at rest and then ...,it will reach a max velocity,1: The meteor will move with increasing veloc...,3
8,9,DTSU012,FF_LV02_PR02.sh,A basketball player is dribbling a basketball ...,Because the ball's velocity is upward while th...,The ball's velocity is decreasing.,1: The ball is slowing down at a constant rat...,0
9,10,DTSU017,Demo_LP01_PR01.sh,A mover pushes a box so that it slides across ...,What forces are acting on the box?,The forces acting on the box is the horizontal...,1: The forces are the force of the mover push...,1


In [10]:

from sklearn.model_selection import train_test_split
modeling, test = train_test_split(data, test_size=0.2, random_state=22)


if not os.path.exists("munge"):
    os.makedirs("munge")

modeling.to_csv(os.path.join("munge", "modeling.csv"), index=False)
test.to_csv(os.path.join("munge", "test.csv"), index=False)

In [12]:
train = pd.read_csv(os.path.join("munge", "train.csv"))

In [13]:
len(train)

538

In [14]:
train

Unnamed: 0,instance_id,student_id,task_id,problem_description,question,answer,reference_answers,label
0,525,DTSU039,FM_LV04_PR10.FCI-16.sh,"To rescue a child who has fallen down a well, ...",Can you articulate Newton's first law?,an object at rest or in motion will remain at ...,1: An object at rest will stay at rest and at...,1
1,363,DTSU033,FM_LVxx_PR02.sh,A rocket is pushing a meteor with constant for...,How will the meteor move after the rocket stop...,The meteor will continue to move at a constant...,"1: When the rocket stops pushing, no forces a...",1
2,560,DTSU019,FM_LV03_PR07,A mover pushes a desk with a horizontal force ...,What can you say about the acceleration of the...,The acceleration and speed of the desk both in...,1: The desk will move with constant accelerat...,2
3,471,DTSU032,FF_LV02_PR02,A basketball player is dribbling a basketball ...,Because the ball's velocity is upward while th...,THE VELOCITY IS DECREASING,1: The ball is slowing down at a constant rat...,0
4,27,DTSU019,FM_LV03_PR07,A mover pushes a desk with a horizontal force ...,What can you say about the speed of the desk?,"When the force on the desk is doubles, the des...",1: The desk moves with increasing velocity. 2...,3
5,355,DTSU018,FM_LV04_PR10.FCI-16,"To rescue a child who has fallen down a well, ...",What can you say about the net force on the kid?,the kid is moving at a constant velocity in a ...,1: Since the child is being raised straight u...,1
6,232,DTSU019,FF_LV02_PR02,A basketball player is dribbling a basketball ...,What forces are acting on the ball when it is ...,The acceleration at this point is at its highe...,1: Gravity is the only force acting on the ba...,3
7,446,DTSU038,FM_LV04_PR10.FCI-16,"To rescue a child who has fallen down a well, ...",How does the amount of tension in the rope com...,The tension force minus the weight of the chil...,1: Since the child is being raised straight u...,3
8,144,DTSU041,Demo_LP01_PR01.sh,A mover pushes a box so that it slides across ...,What forces are acting on the box? Which force...,MOVER APPLIES GREADER FORCE TO THE OBJECT THAN...,1: The forces are the force of the mover push...,3
9,113,DTSU023,FM_LV03_PR07.sh,A mover pushes a desk with a horizontal force ...,Because the net force on the desk was zero bef...,the magnitude of the mover's push is greater t...,"1: Because the net force on the desk is zero,...",2


In [47]:
def read_train():
    return pd.read_csv(os.path.join("munge", "train.csv"))

In [48]:
train = read_train()

In [49]:
train.head()

Unnamed: 0,instance_id,student_id,task_id,problem_description,question,answer,reference_answers,label
0,525,DTSU039,FM_LV04_PR10.FCI-16.sh,"To rescue a child who has fallen down a well, ...",Can you articulate Newton's first law?,an object at rest or in motion will remain at ...,1: An object at rest will stay at rest and at...,1
1,363,DTSU033,FM_LVxx_PR02.sh,A rocket is pushing a meteor with constant for...,How will the meteor move after the rocket stop...,The meteor will continue to move at a constant...,"1: When the rocket stops pushing, no forces a...",1
2,560,DTSU019,FM_LV03_PR07,A mover pushes a desk with a horizontal force ...,What can you say about the acceleration of the...,The acceleration and speed of the desk both in...,1: The desk will move with constant accelerat...,2
3,471,DTSU032,FF_LV02_PR02,A basketball player is dribbling a basketball ...,Because the ball's velocity is upward while th...,THE VELOCITY IS DECREASING,1: The ball is slowing down at a constant rat...,0
4,27,DTSU019,FM_LV03_PR07,A mover pushes a desk with a horizontal force ...,What can you say about the speed of the desk?,"When the force on the desk is doubles, the des...",1: The desk moves with increasing velocity. 2...,3


In [50]:
def get_hash(string):
    return hashlib.md5(string.encode()).hexdigest()[:10]

In [51]:
train['pd_hash'] = train['problem_description'].apply(get_hash)

In [52]:
train['qu_hash'] = train['question'].apply(get_hash)

In [54]:
def extract_reference_answers(reference_answers):
    """Functions that returns a cleaned list of reference answers.

    Args:
        reference_answers (String): Original reference answers in XML file

    Returns:
        ra_list (list): reference answers separated in a list
    """

    # Split on new line character
    ra_list = reference_answers.split(':') 
    # Delete unecessary answer numbers
    ra_list = [re.sub('[0-9]+', '', ra).strip() for ra in ra_list] 
    # Remove empty elements
    ra_list = [ra for ra in ra_list if ra != '']
    return ra_list

train['ra_list'] = train['reference_answers'].apply(extract_reference_answers)

In [55]:
train

Unnamed: 0,instance_id,student_id,task_id,problem_description,question,answer,reference_answers,label,pd_hash,qu_hash,ra_list
0,525,DTSU039,FM_LV04_PR10.FCI-16.sh,"To rescue a child who has fallen down a well, ...",Can you articulate Newton's first law?,an object at rest or in motion will remain at ...,1: An object at rest will stay at rest and at...,1,5059c9cfbb,27570f5d10,[An object at rest will stay at rest and at ob...
1,363,DTSU033,FM_LVxx_PR02.sh,A rocket is pushing a meteor with constant for...,How will the meteor move after the rocket stop...,The meteor will continue to move at a constant...,"1: When the rocket stops pushing, no forces a...",1,abb3be1545,c9b7df0ef8,"[When the rocket stops pushing, no forces are ..."
2,560,DTSU019,FM_LV03_PR07,A mover pushes a desk with a horizontal force ...,What can you say about the acceleration of the...,The acceleration and speed of the desk both in...,1: The desk will move with constant accelerat...,2,d0331a86c9,37b9d25827,[The desk will move with constant acceleration...
3,471,DTSU032,FF_LV02_PR02,A basketball player is dribbling a basketball ...,Because the ball's velocity is upward while th...,THE VELOCITY IS DECREASING,1: The ball is slowing down at a constant rat...,0,a5b241c8bc,52e3695922,"[The ball is slowing down at a constant rate.,..."
4,27,DTSU019,FM_LV03_PR07,A mover pushes a desk with a horizontal force ...,What can you say about the speed of the desk?,"When the force on the desk is doubles, the des...",1: The desk moves with increasing velocity. 2...,3,d0331a86c9,64a1152f6d,"[The desk moves with increasing velocity., The..."
5,355,DTSU018,FM_LV04_PR10.FCI-16,"To rescue a child who has fallen down a well, ...",What can you say about the net force on the kid?,the kid is moving at a constant velocity in a ...,1: Since the child is being raised straight u...,1,5059c9cfbb,3ca349cdde,[Since the child is being raised straight upwa...
6,232,DTSU019,FF_LV02_PR02,A basketball player is dribbling a basketball ...,What forces are acting on the ball when it is ...,The acceleration at this point is at its highe...,1: Gravity is the only force acting on the ba...,3,a5b241c8bc,e712984405,[Gravity is the only force acting on the ball....
7,446,DTSU038,FM_LV04_PR10.FCI-16,"To rescue a child who has fallen down a well, ...",How does the amount of tension in the rope com...,The tension force minus the weight of the chil...,1: Since the child is being raised straight u...,3,5059c9cfbb,71fc865756,[Since the child is being raised straight upwa...
8,144,DTSU041,Demo_LP01_PR01.sh,A mover pushes a box so that it slides across ...,What forces are acting on the box? Which force...,MOVER APPLIES GREADER FORCE TO THE OBJECT THAN...,1: The forces are the force of the mover push...,3,b595153926,35b53f9861,[The forces are the force of the mover pushing...
9,113,DTSU023,FM_LV03_PR07.sh,A mover pushes a desk with a horizontal force ...,Because the net force on the desk was zero bef...,the magnitude of the mover's push is greater t...,"1: Because the net force on the desk is zero,...",2,d0331a86c9,439b223413,"[Because the net force on the desk is zero, th..."


In [92]:
landmarks_ra = train[['pd_hash', 'qu_hash', 'label', 'ra_list']].explode('ra_list').rename(columns={'ra_list':'answer'})
landmarks_ra['label'] = 0 # these are possible correct answers (class id 0)
landmarks_ra = landmarks_ra.drop_duplicates()
print("Found {} distinct reference answer landmarks.".format(len(landmarks_ra)))
landmarks_a = train[['pd_hash', 'qu_hash', 'label', 'answer']].drop_duplicates()
print("Found {} distinct student answer landmarks.".format(len(landmarks_a)))
landmarks = landmarks_ra.append(landmarks_a).drop_duplicates()
print("Found {} distinct landmarks in total.".format(len(landmarks)))

Found 224 distinct reference answer landmarks.
Found 537 distinct student answer landmarks.
Found 760 distinct landmarks in total.


In [104]:
train[(train['pd_hash']=='a5b241c8bc') & (train['qu_hash']=='e712984405')]

Unnamed: 0,instance_id,student_id,task_id,problem_description,question,answer,reference_answers,label,pd_hash,qu_hash,ra_list
6,232,DTSU019,FF_LV02_PR02,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,"The acceleration at this point is at its highest point, and the forces acting on it are gravity and the normal forces from the floor that is pushing the ball back up to the player's hand.","1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",3,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
36,145,DTSU018,FF_LV02_PR02,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,"When the ball is moving upward, it has a normal force, the force of gravity and it continues upwards to hit its peak to gain a velocity of zero","1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",3,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
70,535,DTSU002,FF_LV02_PR02.sh,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,There is the force of gravity and the force applied from the player's hand. The motion of the ball is upward.,"1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",2,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
84,262,DTSU021,FF_LV02_PR02,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,"the basketball is in free fall, therefore the only force acting on it is the gravitational force","1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",1,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
124,62,DTSU032,FF_LV02_PR02,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,THE FORCE OF GRAVITY IS ACTING ON IT,"1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",1,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
130,573,DTSU007,FF_LV02_PR02.sh,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,"When the ball is midway between the player's hand and the ground, the gravitational force from the earth is acting on the ball, as well as the force from the bounce.","1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",2,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
135,330,DTSU026,FF_LV02_PR02.sh,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,"a downward force from the hand, a gravitational force and a normal force from the ground","1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",3,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
153,234,DTSU016,FF_LV02_PR02.sh,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,"When it is moving upward, and in the air. The only force acting on the ball is gravity.","1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",1,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
208,606,DTSU034,FF_LV02_PR02,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,"when the ball is moving upwards, there is the force of gravity and the force that was applied when the ball hit the ground.","1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",3,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."
242,406,DTSU014,FF_LV02_PR02.sh,A basketball player is dribbling a basketball (continuously bouncing the ball off the ground).,What forces are acting on the ball when it is moving upward and it is midway between the player's hand and the ground? Describe the motion of the ball at that moment.,the forces of gravity and the push from the floor are acting upon the ball. The motion of the ball is upwards and towards the player's hand.,"1: Gravity is the only force acting on the ball. The ball is slowing down at a constant rate. 2: Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down. 3: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down. 4: Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing. 5: Gravity is the only force acting on the ball. The ball's velocity is decreasing. 6: Gravity is the only force acting on the ball. The ball is slowing down. 7: The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate. 8: The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing ...",3,a5b241c8bc,e712984405,"[Gravity is the only force acting on the ball. The ball is slowing down at a constant rate., Gravity is the only force acting on the ball. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball is slowing down., Gravity is the only force acting on the ball. Since the ball's acceleration is in the opposite direction of it's velocity, the ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball's velocity is decreasing., Gravity is the only force acting on the ball. The ball is slowing down., The only force acting on the ball is the downward force of gravity. The ball is slowing down at a constant rate., The only force acting on the ball is the downward force of gravity. Since the ball's velocity is upward and its acceleration is downward, the ball is slowing down., The only force ac..."


In [111]:
landmarks.head()

Unnamed: 0,pd_hash,qu_hash,label,answer
0,5059c9cfbb,27570f5d10,0,An object at rest will stay at rest and at object moving with constant velocity in a straight line will continue moving with constant velocity in a straight line as long as the net force acting on the object is zero.
0,5059c9cfbb,27570f5d10,0,If all the forces acting on an object add up to zero (accounting for the direction of the forces and not just their magnitude) then the acceleration of that object also has to be zero. It can move at a constant speed but it cannot be accelerating.
0,5059c9cfbb,27570f5d10,0,"When the object is in equilibrium or velocity is constant, the sum of all forces will equal ."
1,abb3be1545,c9b7df0ef8,0,"When the rocket stops pushing, no forces are acting on the meteor anymore and therefore will move with constant velocity in a straight line."
1,abb3be1545,c9b7df0ef8,0,"When the rocket stops pushing, the meteor will move with constant velocity in a straight line."


In [114]:
from gensim.models import KeyedVectors

print('Indexing word vectors..')

embedding_file = os.path.join("munge", "GoogleNews-vectors-negative300.bin")
word2vec = KeyedVectors.load_word2vec_format(embedding_file, binary=True)

print('Found %s word vectors of word2vec' % len(word2vec.vocab))

Indexing word vectors..
Found 3000000 word vectors of word2vec


In [115]:
word2vec.doesnt_match("second 2nd third".split())

  vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)


'2nd'

In [194]:
word2vec.most_similar("decreasing")

KeyboardInterrupt: 

In [116]:
word2vec.most_similar("2nd")

[('3rd', 0.9359426498413086),
 ('4th', 0.9051286578178406),
 ('1st', 0.9043662548065186),
 ('5th', 0.8837502002716064),
 ('6th', 0.8750925064086914),
 ('7th', 0.8689243793487549),
 ('8th', 0.8483569622039795),
 ('9th', 0.8367735147476196),
 ('3_rd', 0.7579688429832458),
 ('##st', 0.7001097202301025)]

In [156]:
# Enrich the punctuation
import string
punctuation = string.punctuation
punctuation = punctuation + "’" + "“" + "?" + "‘"
print(punctuation)

def removePunctuation(phrase):
    """
    Removes punctuation, changes to lower case, strips leading, middle and trailing spaces.

    Note:
        Only spaces, letters, and numbers are retained.  Other characters are eliminated (e.g. it's becomes its).
        Leading, middle and trailing spaces are removed after punctuation is removed.

    Args:
        phrase (String): A String containing a sentence.

    Returns:
        nopun: A String with clean-up operation applied.
    """
    
    nopunc = ''.join([c if c not in punctuation else ' ' for c in phrase])
    nopunc = re.sub(r'\s+', ' ', nopunc)
    return nopunc.lower()


# Stemming, functions
import re

def tokenize(phrase):
    """
    Produces a list of tokens from a character string

    Args:
        phrase (String): A character string.

    Returns:
        tokenized: A List of tokens.
    """ 
    
    tokenized = phrase.split()
    
    return tokenized


def filterVocab(tokens):
    tokens = [token for token in tokens if token in word2vec.vocab]
    return tokens


def preprocess(phrase):
    
    return filterVocab(tokenize(removePunctuation(phrase)))

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~’“?‘


In [131]:
phrase1 = preprocess("2nd law")
phrase2 = preprocess("second law")
word2vec.n_similarity(phrase1, phrase2)

0.82781506

In [132]:
phrase1 = preprocess("2nd law")
phrase2 = preprocess("third law")
word2vec.n_similarity(phrase1, phrase2)

0.8073168

In [133]:
phrase1 = preprocess("2nd law")
phrase2 = preprocess("3rd law")
word2vec.n_similarity(phrase1, phrase2)

0.9695801

In [142]:
phrase1 = preprocess("2nd law")
phrase2 = preprocess("Newton's 3rd law applies as a non-zero net force acts on the desk")
word2vec.n_similarity(phrase1, phrase2)

0.51169366

In [171]:
def get_embeddings(tokens):
    
    emb = np.zeros(300)
    if len(tokens) > 0:
        nb_tokens = 0
        for token in tokens:
            emb += word2vec.word_vec(token)
            nb_tokens += 1
        emb /= nb_tokens
    return np.array(emb)

get_embeddings(preprocess("2nd law"))

array([ 0.07080078,  0.02709961,  0.0925293 ,  0.12402344, -0.03125   ,
       -0.09716797, -0.15527344,  0.00256348,  0.05187988,  0.02008057,
       -0.00878906, -0.05255127, -0.16296387,  0.11401367, -0.23657227,
        0.03515625,  0.11669922, -0.05908203, -0.14794922, -0.12719727,
        0.09008789, -0.05316162,  0.03417969,  0.00268555,  0.00823975,
       -0.20117188,  0.03417587, -0.06323242, -0.0322876 ,  0.25292969,
       -0.08447266, -0.23583984,  0.00073242, -0.06878662,  0.02758789,
       -0.00549316,  0.06286621,  0.18359375, -0.04824829,  0.09750366,
       -0.03515625, -0.03369141,  0.01000977,  0.11010742, -0.01611328,
        0.13696289,  0.03872681, -0.03405762, -0.08227539,  0.02685547,
        0.07202148, -0.02441406,  0.15454102,  0.21679688, -0.03469849,
        0.13208008,  0.00598145, -0.00634766, -0.16455078, -0.1595459 ,
       -0.0300293 , -0.18066406, -0.00170898, -0.21801758, -0.203125  ,
        0.11395264,  0.15429688,  0.02093506,  0.03515625,  0.17

In [158]:
landmarks.iloc[5]['answer']

'The meteor will move with constant velocity in a straight line.'

In [162]:
from numpy import dot
from numpy.linalg import norm

def cossim(a, b):
    return dot(a,b) / (norm(a) * norm(b))

for i in range(len(landmarks)):
    emb1 = get_embeddings(preprocess("2nd law"))
    emb2 = get_embeddings(preprocess(landmarks.iloc[i]['answer']))
    print(i, cossim(emb1, emb2), landmarks.iloc[i]['answer'])

0 0.20921653410389637 An object at rest will stay at rest and at object moving with constant velocity in a straight line will continue moving with constant velocity in a straight line as long as the net force acting on the object is zero.
1 0.21318218677528852 If all the forces acting on an object add up to zero (accounting for the direction of the forces and not just their magnitude) then the acceleration of that object also has to be zero. It can move at a constant speed but it cannot be accelerating.
2 0.1923720223360516 When the object is in equilibrium or velocity is constant, the sum of all forces will equal .
3 0.2137879921148141 When the rocket stops pushing, no forces are acting on the meteor anymore and therefore will move with constant velocity in a straight line.
4 0.18871505938040894 When the rocket stops pushing, the meteor will move with constant velocity in a straight line.
5 0.18486718771924596 The meteor will move with constant velocity in a straight line.
6 0.2266182

211 0.21318218677528852 If all the forces acting on an object add up to zero (accounting for the direction of the forces and not just their magnitude) then the acceleration of that object also has to be zero. It can move at a constant speed but it cannot be accelerating.
212 0.1923720223360516 When the object is in equilibrium or velocity is constant, the sum of all forces will equal
213 0.30761218189812006 Whenever one object exerts a force on a second object, the second object exerts an equal and opposite force on the first.
214 0.24353741133494422 The direction of the ball's acceleration is the same with that of the force.
215 0.24353741133494422 the direction of the ball's acceleration is the same with that of the force.
216 0.21937901963695156 Since the box is moving with a constant velocity, the net force on the box is zero and all the forces on the box balance.
217 0.20869994643289702 The box moves with constant velocity, so there no net force on the box
218 0.18910921153971655 

  """


 0.22854569340394196 THE SPEED OF THE BOX INCREASES. THE FORCE OF THE MOVER INCREASES AND THE FORCE OF FRICTION DECREASES
403 0.19316975518879168 the speed of the desk will double
404 0.2763614787458221 Yes, because an object at rest stays at rest unless acted on by a net force
405 0.10782653716959002 the rocket will move at constant velocity!!!
406 0.1432037739319602 The magnitudes of the forces will be equal.
407 0.3492468028257399 Newton's second law is helpful here. If the rocket pushes with a constant force, the meteor will move with a constant acceleration.
408 0.39771024151746365 Newton's second law; force equals mass times acceleration
409 0.21783681143010733 There is a downward force exerted on the puck by gravity. There is an upward normal force exerted on the puck from the ice.
410 0.3993335926657491 Newton's second law is that force is equal to mass times acceleration.
411 0.3143147992765658 The tension in the rope is equal to the mass of boy times gravity. Newton's second 

549 0.1386532908207386 the ball's velocity is constant
550 0.1442443638403039 the acceleration doubles
551 0.29869036376026276 AN OBJECT IN MOTION WILL STAY IN MOTION IF THERE IS NO NET FORCE ACTING
552 0.31693321584493256 Newton's first law is useful. The net force on the desk will be zero, seeing as the desk moves across a carpeted floor at a constant velocity.
553 0.1998514229294079 the force applied by the platers minus the kinetic frictional force
554 0.17455211088023936 THE FORCE OF THE MOSQUITO ON THE WIDSHIELD IS SMALLER THAN WINSHIELD ON THE MOSQUITO
555 0.16966782083398624 The mover, gravity, and friction are all acting on the box. The weight of the box and normal force are balancing each other out.
556 0.20032673355517186 The sum of forces along the x and y axis equal the net force acting on an object.
557 0.13535761641478666 The acceleration of the meteor is constant
558 0.1875818173763599 The tension of the rope is greater than the weight of the child
559 0.177398058021549

736 0.267784804049542 in the direction of the force acting on it
737 0.16492428053970717 acceleration is net force divided by mass
738 0.16159558550711758 the ball's velocity is decreasing
739 0.11076795431700394 The balls velocity is decreasing.
740 0.7056166330873999 2nd
741 0.23969880781887506 The net force on the object will not be zero when the mover doubles his force. He will no longer be moving at a constant velocity. His speed will increase.
742 0.22729585185378792 The action of the mosquito hitting the windshield has an equal and opposite reaction of the windshield hitting the mosquito.
743 0.13731333531178808 GRAVITY, THE PUSH, FRICTION AND THE Fn
744 0.1676600011622231 force is eqal to the weight
745 0.15052500507902095 THE METEOR WILL ACCELERATE AND THEN MOVE WITH A CONSTANT VELOCITY
746 0.21133033917316052 The forces acting on the ball are gravity and the normal force from the ball. The ball moves verticallt upward at a constant velocity.
747 0.17770741748103297 it will in

In [136]:
word2vec.wmdistance(phrase1, phrase2)

ModuleNotFoundError: No module named 'pyemd'

In [165]:
landmarks.head()

Unnamed: 0,pd_hash,qu_hash,label,answer
0,5059c9cfbb,27570f5d10,0,An object at rest will stay at rest and at object moving with constant velocity in a straight line will continue moving with constant velocity in a straight line as long as the net force acting on the object is zero.
0,5059c9cfbb,27570f5d10,0,If all the forces acting on an object add up to zero (accounting for the direction of the forces and not just their magnitude) then the acceleration of that object also has to be zero. It can move at a constant speed but it cannot be accelerating.
0,5059c9cfbb,27570f5d10,0,"When the object is in equilibrium or velocity is constant, the sum of all forces will equal ."
1,abb3be1545,c9b7df0ef8,0,"When the rocket stops pushing, no forces are acting on the meteor anymore and therefore will move with constant velocity in a straight line."
1,abb3be1545,c9b7df0ef8,0,"When the rocket stops pushing, the meteor will move with constant velocity in a straight line."


In [193]:
with open(os.path.join("munge", "landmarks.txt"), 'w') as f:
    f.write('pd_hash\tqu_hash\tlabel\tanswer\tembedding\n')
    for i in range(len(landmarks)):
        pd_hash = landmarks.iloc[i]['pd_hash']
        qu_hash = landmarks.iloc[i]['qu_hash']
        label = landmarks.iloc[i]['label']
        answer = landmarks.iloc[i]['answer']
        emb = get_embeddings(preprocess(landmarks.iloc[i]['answer']))
        emb = ','.join(map(str, emb))
        f.write("%s\t%s\t%s\t%s\t%s\n" %(pd_hash, qu_hash, label, answer, emb))

In [189]:
emb = get_embeddings(preprocess(landmarks.iloc[0]['answer']))
s= ','.join(map(str, emb))
type(s)

str

In [43]:
extract_reference_answers(landmarks_ra.iloc[0]['reference_answers'])

['An object at rest will stay at rest and at object moving with constant velocity in a straight line will continue moving with constant velocity in a straight line as long as the net force acting on the object is zero.',
 'If all the forces acting on an object add up to zero (accounting for the direction of the forces and not just their magnitude) then the acceleration of that object also has to be zero. It can move at a constant speed but it cannot be accelerating.',
 'When the object is in equilibrium or velocity is constant, the sum of all forces will equal .']

In [44]:
landmarks_ra['ra_exploded'] = landmarks_ra['reference_answers'].apply(extract_reference_answers)
landmarks_ra.head()

Unnamed: 0,pd_hash,qu_hash,reference_answers,label,ra_exploded
0,5059c9cfbb,27570f5d10,1: An object at rest will stay at rest and at...,1,[An object at rest will stay at rest and at ob...
1,abb3be1545,c9b7df0ef8,"1: When the rocket stops pushing, no forces a...",1,"[When the rocket stops pushing, no forces are ..."
2,d0331a86c9,37b9d25827,1: The desk will move with constant accelerat...,2,[The desk will move with constant acceleration...
3,a5b241c8bc,52e3695922,1: The ball is slowing down at a constant rat...,0,"[The ball is slowing down at a constant rate.,..."
4,d0331a86c9,64a1152f6d,1: The desk moves with increasing velocity. 2...,3,"[The desk moves with increasing velocity., The..."


In [46]:
landmarks_ra.iloc[0:1].explode('ra_exploded')

Unnamed: 0,pd_hash,qu_hash,reference_answers,label,ra_exploded
0,5059c9cfbb,27570f5d10,1: An object at rest will stay at rest and at...,1,An object at rest will stay at rest and at obj...
0,5059c9cfbb,27570f5d10,1: An object at rest will stay at rest and at...,1,If all the forces acting on an object add up t...
0,5059c9cfbb,27570f5d10,1: An object at rest will stay at rest and at...,1,When the object is in equilibrium or velocity ...


In [25]:
pd.__version__

'0.24.2'