# Setup

In [1]:
import pandas as pd
!pip install gspread oauth2client
import numpy as np



In [2]:
# Connect to google drive
from google.colab import drive
drive.mount('/content/drive')

from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

Mounted at /content/drive


In [3]:
# Get spreadsheet from google drive
gc = gspread.authorize(creds)

spreadsheet = gc.open('Team 3 survey responses')

# Select the first worksheet
worksheet = spreadsheet.sheet1
data = worksheet.get_all_values()

## Reading spreadsheet into dataframe

In [4]:
df = pd.DataFrame(data[1:], columns=data[0]) # assuming the first row is header

# Isolating private property section
ourQs = [
    "What are your overall thoughts about foraging on private property?",
    "Have you ever harvested from trees inside private property?",
    "If a fruit tree isn’t accessible from the street, how comfortable would you feel walking onto someone’s property to harvest assuming it is marked as okay to pick?",
    "Please explain your choice.",
    "What are some benefits or concerns about harvesting from private trees?",
    "If there was a way to see information about fruit trees on private property (that are okay to forage from), what kind of details would you want to see?",
    "Do you have fruit-bearing trees on your property?",
    "Are your trees directly accessible from the street or sidewalk?",
    "Would you be open to letting people pick from your trees if there are ways to set clear boundaries?",
    "If you selected no, please briefly explain why.",
    "Have you ever resorted to your own methods to encourage or discourage harvesting fruit from your trees?",
    "Have you ever had foragers pick from your tree?",
    "If you selected yes to the previous question, how did it go?",
    "If there was a feature to share information about your fruit trees with potential harvesters, what kind of details would you want to include?"
]
df = df[ourQs]

# Rename columns
newNames = {
    "What are your overall thoughts about foraging on private property?": "Overall thoughts",
    "Have you ever harvested from trees inside private property?": "Harvested on private property?",
    "If a fruit tree isn’t accessible from the street, how comfortable would you feel walking onto someone’s property to harvest assuming it is marked as okay to pick?": "How comfortable foraging on PP with permission?",
    "Please explain your choice.": "Explain how comfortable",
    "What are some benefits or concerns about harvesting from private trees?": "Benefits/concerns",
    "If there was a way to see information about fruit trees on private property (that are okay to forage from), what kind of details would you want to see?": "What kinds of information about private trees should be included?",
    "Do you have fruit-bearing trees on your property?": "Have private trees?",
    "Are your trees directly accessible from the street or sidewalk?": "Street accessible private trees?",
    "Would you be open to letting people pick from your trees if there are ways to set clear boundaries?": "Open to allowing foragers if can set boundaries?",
    "If you selected no, please briefly explain why.": "Explain no answer",
    "Have you ever resorted to your own methods to encourage or discourage harvesting fruit from your trees?": "Use own methods to encourage or discourage foraging your trees?",
    "Have you ever had foragers pick from your tree?": "Had foragers pick from your tree?",
    "If you selected yes to the previous question, how did it go?": "If yes, how did it go?",
    "If there was a feature to share information about your fruit trees with potential harvesters, what kind of details would you want to include?": "What tree details should be shared with harvesters?"
}
df = df.rename(columns=newNames)

In [5]:
# Standardizing responses

nonNumCols = df.select_dtypes(exclude=['number']).columns.tolist()

for col in nonNumCols:
    df[col] = df[col].str.strip().str.lower()

# Replace empty entries with NaN
df.replace(r'^\s*$', np.nan, regex=True, inplace=True)

## Split into forager and owner dataframes

In [6]:
# Foragers dataframe
foragers_df = df[
    [
        "Overall thoughts",
        "Harvested on private property?",
        "How comfortable foraging on PP with permission?",
        "Explain how comfortable",
        "Benefits/concerns",
        "What kinds of information about private trees should be included?",
        "Have private trees?"
    ]
].copy()

# Owners dataframe
owners_df = df[df["Have private trees?"] == "yes"].copy()
owners_df = owners_df[
    [
        "Street accessible private trees?",
        "Open to allowing foragers if can set boundaries?",
        "Explain no answer",
        "Use own methods to encourage or discourage foraging your trees?",
        "Had foragers pick from your tree?",
        "If yes, how did it go?",
        "What tree details should be shared with harvesters?"
    ]
]

## Helpers

In [7]:
# Define stopwords
stopwords = {"i", "the", "to", "a", "its", "it", "and", "it's"}

# Function to remove stopwords from a string
def removeStopwords(text):
    if pd.isna(text): return text
    words = text.split()
    filtered = [word for word in words if word not in stopwords]
    return " ".join(filtered)

# Organization guide
**General foraging questions**
1. Overall thoughts  *
2. Harvested on private property?
3. How comfortable foraging on PP with permission? + Explain how comfortable  *
4. Benefits/concerns  *
5. What kinds of information about private trees should be included?	 *
---
**Tree owner questions**
6. Have private trees?
7. Street accessible private trees?
8. Open to allowing foragers if can set boundaries?	+ Explain no answer  *
9. Use own methods to encourage or discourage foraging your trees?
10. Had foragers pick from your tree?	+ If yes, how did it go?  	*
11. What tree details should be shared with harvesters?  *

multi-select: 5, 11

ranking: 3

qualitative: 1, 4

sort binary ans: 8, 10

---


# Cleaning Q5 & Q11

In [16]:
#Q5
Q5 = "What kinds of information about private trees should be included?"

Q5_filtered = foragers_df[Q5]
Q5_filteredDF = Q5_filtered.to_frame(name=Q5)

# Split by commas and trim whitespace
Q5_split = Q5_filtered.apply(lambda x: [item.strip() for item in x.split(',')])

# Explode into separate rows
Q5_exploded = Q5_split.explode().reset_index(drop=True)

# Turn value counts into DataFrame with reset_index
Q5_counts_df = Q5_exploded.value_counts().reset_index()

# Display
# Q5_counts_df

In [19]:
#Q11
Q11 = "What tree details should be shared with harvesters?"

Q11_col = owners_df[Q11]
Q11_DF = Q11_col.to_frame(name=Q11)

# Split by commas and trim whitespace
Q11_split = Q11_col.apply(lambda x: [item.strip() for item in x.split(',')])

# Explode into separate rows
Q11_exploded = Q11_split.explode().reset_index(drop=True)

# Turn value counts into DataFrame with reset_index
Q11_counts_df = Q11_exploded.value_counts().reset_index()

# Display
# Q11_counts_df

#Cleaning Q3

In [27]:
Q3_1 = "How comfortable foraging on PP with permission?"
Q3_2 = "Explain how comfortable"

Q3_DF = foragers_df[[Q3_1, Q3_2]]

# group by the numeric rating in Q3_1
Q3_grouped = Q3_DF.groupby(Q3_1)
Q3_groupedDF = Q3_DF.sort_values(by=Q3_1).reset_index(drop=True)

# Display
# Q3_groupedDF

#Cleaning Q1 & Q4 (nvm, ashley got it)

In [28]:

# Q1 - Overall thoughts
# Apply filter and remove all NaN rows, convert to df
Q1_filtered = foragers_df["Overall thoughts"].apply(removeStopwords)
Q1_filtered = Q1_filtered.dropna(how="all").to_frame(name="Overall thoughts")

# Q4 - Benefits/concerns
# Apply filter and remove all NaN rows, convert to df
Q4_filtered = foragers_df["Benefits/concerns"].apply(removeStopwords)
Q4_filtered = Q4_filtered.dropna(how="all").to_frame(name="Benefits/concerns")