In [1]:
import pandas as pd 
import pymongo

## Connecting to DB

In [2]:
client = pymongo.MongoClient("localhost", 27017)
db = client["legume-choice"]
print(db.name)
# Extracting the projects data
projectsData =  db["projects"]
# Finding the number of projects
print(projectsData.count_documents({}))

legume-choice
3


## Extracting Data

In [3]:
# Extacting all projects
allProjects = []
for project in projectsData.find():
        allProjects.append(project)

# Example querying from the dataset

allProjects[0]["rawdata"]["agroEcoData"]


{'biofilters': [{'name': 'Rainfall (mm/year)',
   'label': 'rainfall',
   'minValue': 280,
   'maxValue': 4290,
   'value': 2175},
  {'name': 'Temperature (mean °C/month)',
   'label': 'temp',
   'minValue': 0,
   'maxValue': 45,
   'value': 14.5},
  {'name': 'Altitude (average masl)',
   'label': 'alt',
   'minValue': 0,
   'maxValue': 3800,
   'value': 1845},
  {'name': 'Soil pH (average)',
   'label': 'soilpH',
   'minValue': 4,
   'maxValue': 9,
   'value': 8.5}]}

## Extracting Agro Ecological Scores for all Projects

In [11]:
# Tabular AgroEco scores

def ExtractProjectAgroEcoData(project):
    projectID =  project["projectID"]
    projectName =  project["rawdata"]["projectInfo"]["projectName"]

    agroEcoData = project["rawdata"]["agroEcoData"]["biofilters"]

    row={"projectID":projectID,"projectName":projectName}


    for index in range(0,len(agroEcoData)):
        row[agroEcoData[index]["label"]]=agroEcoData[index]["value"]
        
    return row
def ExtractAllAgroEcoData(projects):
    agroEcoData=[]
    for project in projects:
        agroEcoData.append(ExtractProjectAgroEcoData(project=project))
    return pd.DataFrame(agroEcoData)




In [12]:
agroEcoData = ExtractAllAgroEcoData(allProjects)
agroEcoData

## Extracting all Data for Context Scores

In [59]:
def subsetScoresByVariables(scores, attributeLabel, participantLabel, typologyLabel, average):

    if (average==False):
        for score in scores:
            if (score["scoreType"]=="individual" and 
            score["attribute"]["label"]==attributeLabel and 
            score["participant"]["label"]==participantLabel and 
            score["typology"]["label"]==typologyLabel):
                 return score["score"]

    if (average==True):
        for score in scores:
            if (score["scoreType"]=="average" and 
            score["attribute"]["label"]==attributeLabel): 
                return score["score"]

    raise ValueError("Did not identify correct subset")
    

def ScoresForAllAttributes(scores, attributes, participant, typology, average):
    listOfScores=[]
    for attribute in attributes:
        listOfScores.append(subsetScoresByVariables(scores=scores, 
        attributeLabel=attribute["label"], 
        participantLabel=participant["label"], 
        typologyLabel=typology["label"], 
        average=average))

    return listOfScores


def projectContextScores(project):

    # Initialising Variables
    projectID =  project["projectID"]
    projectName =  project["rawdata"]["projectInfo"]["projectName"]


    allScores = project["rawdata"]["contextScores"]["scores"]
    attributes = project["rawdata"]["contextScores"]["attributes"]
    participants = project["rawdata"]["contextScores"]["participants"]
    typologies = project["rawdata"]["contextScores"]["typologies"]

    # Creating column names for dataframe
    typologyColumns = [ "t_"+typology["label"] for typology in typologies]
    participantColumns = [participant["label"] for participant in participants]
    allColumns = [typ+"_"+particip for typ in typologyColumns for particip in participantColumns]

    # Initialising empty array of scores
    scoresToReturn={}
    column=0
    scoresToReturn["projectName"] = [projectName for attribute in attributes]
    scoresToReturn["projectID"] = [projectID for attribute in attributes]
    for typology in typologies:
        for participant in participants:
            scoresToReturn[allColumns[column]]=ScoresForAllAttributes(scores=allScores, 
                                                                        attributes=attributes,
                                                                        participant=participant, 
                                                                        typology=typology, 
                                                                        average=False )
            column+=1
    
    scoresToReturn["average"]=ScoresForAllAttributes(scores=allScores, 
                                                        attributes=attributes,
                                                        participant=participants[1], 
                                                        typology=participants[1], 
                                                        average=True )
    return scoresToReturn

def ContextScoresAllProjectsList(projects):
    projectsCombined=[]
    #projectIndex=0
    for project in projects:
        projectsCombined.append(projectContextScores(project=project))
        #projectIndex+=1
    return(projectsCombined)

def ContextScoresAllProjects(projects):
    contextScoresList = ContextScoresAllProjectsList(allProjects)
    if len(contextScoresList)==1:
        return pd.DataFrame(contextScoresList)
    if len(contextScoresList)>1:
        contextScoresDF=pd.DataFrame(contextScoresList[0])
        for scoreItem in contextScoresList[1:]:
            contextScoresDF = contextScoresDF.append(pd.DataFrame(scoreItem))
    return contextScoresDF


        

In [60]:
ContextScoresAllProjects(projects=allProjects)


Unnamed: 0,projectName,projectID,t_low_farmer,t_low_expert,t_med_farmer,t_med_expert,t_high_farmer,t_high_expert,average
0,wqeaafd,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,1,2,3,1,4,2,2.166667
1,wqeaafd,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,2,3,1,2,4,4,2.666667
2,wqeaafd,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,1,2,4,1,2,4,2.333333
3,wqeaafd,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,2,3,4,4,4,4,3.5
4,wqeaafd,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,4,1,4,0,3,4,2.666667
5,wqeaafd,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,1,1,0,0,1,0,0.5
6,wqeaafd,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,3,0,4,0,0,0,1.166667
0,Project 2,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,3,2,4,1,4,2,2.666667
1,Project 2,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,1,1,1,2,4,4,2.166667
2,Project 2,d4e74b50-6b07-4bb2-9be7-af7f52e4947d,4,0,4,1,0,4,2.166667


## Extracting Data for PairWise Rankings

In [None]:
allProjects["rawdata"][]