In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
# Set working directory
# Set the working directory to the path where your files are located
os.chdir('path_to_your_data')

In [None]:
import pandas as pd

# Load your dataset
#data = pd.read_csv('Survey_Data.csv')

# Read the CSV with specified encoding to avoid UnicodeDecodeError
#data = pd.read_csv('your_file.csv', encoding='ISO-8859-1')

data = pd.read_csv('path_to_your_data.csv', encoding='ISO-8859-1')

In survey data analysis, categorical variables often need to be transformed into numerical values before they can be used in statistical analyses or machine learning models. This transformation is particularly important when working with methods like Principal Component Analysis (PCA), which require numerical input.

**Example: Transforming Irrigation Methods**
In this step, we are transforming the categorical variable Irrigation method into a new numerical variable Irrigation Method Coded. The transformation is done using a predefined mapping dictionary, where each category or combination of categories is assigned a unique numeric code.

Mapping Explanation:

'CENTER-PIVOTS' is assigned the code 1.

'DRIP' is assigned the code 2.

'FURROW/FLOOD' is assigned the code 3.

Combinations of methods like 'CENTER-PIVOTS, DRIP' are given unique codes (4, 5, 6, etc.) to account for cases where respondents use multiple methods.

In [None]:
# Transform categorical variables (example for Irrigation Method)
irrigation_mapping = {
    'CENTER-PIVOTS': 1,
    'DRIP': 2,
    'FURROW/FLOOD': 3,
    'CENTER-PIVOTS, DRIP': 4,  
    'CENTER-PIVOTS,FURROW/FLOOD': 5, 
    'DRIP,FURROW/FLOOD': 6, # Example for multiple methods
}
data['Irrigation Method Coded'] = data['Irrigation method'].map(irrigation_mapping) 

**Below are additional examples to demonstrate the flexibility of transforming your data**

You can use it as a guide on how to transform your own survey data

In [None]:
# Transform categorical variables (example for SOURCE OF WATER)
water_source_mapping = {
    'GROUNDWATER': 1,
    'SURFACE': 2,
    'DRYLAND': 3,
    'SURFACE AND GROUND': 4,
    'SURFACE AND GROUND, DRYLAND': 5,
    # Example for multiple methods
}
data['Water Source Coded'] = data['Source of irrigation water'].map(water_source_mapping) 

In [None]:
# Transform land ownership
land_ownership_mapping = {
    'Individual': 1,
    'Rent': 2,
    'Lease': 3,
    'Co-own': 4,
    'Individual, Rent': 5,  # Combined categories
    'Individual, Lease': 6,
    'Rent, Lease': 7,
    'Individual, Co-own': 8,
    'Individual, Co-own, Rent': 9,
    
}
data['Land Ownership Coded'] = data['Describe land ownership'].map(land_ownership_mapping)

In [None]:
# Transform years in operation
years_op_mapping = {
    '10-15Y': 1,
    '15-20Y': 2,
    '20-25Y': 3,
    '25-30Y': 3,
    '>30Y': 4,  
}
data['Years Op Coded'] = data['Years in operation'].map(years_op_mapping)

In [None]:
# Transform farm size
farm_size_mapping = {
    '1-100': 1,
    '100-500': 2,
    '500-1000': 3,
    '>1000': 4,
   
}
data['Farm size Coded'] = data['Total acres of agricultural land owned/leased'].map(farm_size_mapping)

In [None]:

# Transform future plans
future_gen_mapping = {
    'NO': 1,
    'YES': 2, 
    'UNKNOWN': 0,
}
data['Future plans Coded'] = data['Pass to future generations'].map(future_gen_mapping)

In [None]:
# Transform membership in organizations
member_org_mapping = {
    'NO': 1,
    'YES': 2,    
}
data['Membership Coded'] = data['Member farming organizations'].map(member_org_mapping)

In [None]:

# Transform what determines decision on irrigation amount
decision_irr_amount_mapping = {
    'CONSULTANTS': 1,
    'CROP CONDITIONS': 2,
    'CROP CONDITIONS,CONSULTANTS': 3,
    'CROP CONDITIONS,REGULATIONS': 4,
    'SOIL MOISTURE,REGULATIONS': 5, 
    'SOIL MOISTURE,CROP CONDITIONS,OTHER': 6,
    'SOIL MOISTURE,CROP CONDITIONS,CONSULTANTS': 7,
    'SOIL MOISTURE,CROP CONDITIONS': 8,
    'SOIL MOISTURE, OTHER': 9,
    'SOIL MOISTURE': 10, 
    'SAME AMOUNT,CROP CONDITIONS': 11,
    'SAME AMOUNT': 12,
    'OTHER': 13, 
}
data['dec_irr_amount Coded'] = data['Irrigation amount_new'].map(decision_irr_amount_mapping)

**Explanation for Transforming Likert Scale for Behavior Variables**
What would you do if your survey includes Likert scale questions? These questions are often categorical and need to be transformed into numerical values for quantitative analysis. 

The step below involves converting the Likert scale responses for behavior-related questions into numerical codes that represent the strength or likelihood of the responses. 

This transformation ensures the data is ready for statistical analyses such as regression, clustering, or factor analysis.
There are several examples below that you can tailor to suit your survey data

In [None]:
# Transform Likert scale for behavior. 
behavior_mapping = {
    'EXTREMELY UNLIKELY': 1,
    'NOT LIKELY': 2,
    'LIKELY': 3,
    'EXTREMELY LIKELY': 4,   
}
data['Behavior1 Coded'] = data['Change your irrigation system'].map(behavior_mapping)
data['Behavior2 Coded'] = data['Irrigate less frequently'].map(behavior_mapping)
data['Behavior3 Coded'] = data['Change time of day for irrigation'].map(behavior_mapping)
data['Behavior4 Coded'] = data['Switch to dryland farming'].map(behavior_mapping)
data['Behavior5 Coded'] = data['Turn off end guns'].map(behavior_mapping)
data['Behavior6 Coded'] = data['Plant less water intensive crops'].map(behavior_mapping)
data['Behavior7 Coded'] = data['Change start of planting season'].map(behavior_mapping)
data['Behavior8 Coded'] = data['Change start of planting season'].map(behavior_mapping)
data['Behavior9 Coded'] = data['Stop farming'].map(behavior_mapping)
data['Behavior10 Coded'] = data['Make no change'].map(behavior_mapping)

In [None]:
# Transform Likert scale for attitudes. This method can be used for several items
likert_mapping = {
    
    'STRONGLY DISAGREE': 1,
    'SOMEWHAT DISAGREE': 2,
    'NEUTRAL': 3,
    'SOMEWHAT AGREE': 4,
    'STRONGLY AGREE': 5,
    'DON’T KNOW': 0  # Handling "Don't know"
}
#data['Attitude1 Coded'] = data['Groundwater is a valuable resource to me and my farm'].map(likert_mapping) - 
data['Attitude1 Coded'] = data['Groundwater is a valuable resource to me and my farm'].map(likert_mapping)
data['Attitude2 Coded'] = data['There is currently a groundwater shortage in my area'].map(likert_mapping)
data['Attitude3 Coded'] = data['I have a role to play in conserving groundwater resources'].map(likert_mapping)
data['Attitude4 Coded'] = data['Conserving groundwater resources can benefit the environment'].map(likert_mapping)
data['Attitude5 Coded'] = data['Conserving groundwater resources can increase my farm income'].map(likert_mapping)
data['Attitude6 Coded'] = data['I have a responsibility to report or stop any groundwater wasting actions that I encounter'].map(likert_mapping)
data['Attitude7 Coded'] = data['The actions that I and my family take can make a difference in conserving groundwater resources'].map(likert_mapping)
data['Attitude8 Coded'] = data['Conserving groundwater has more advantages than disadvantages for me and my farm'].map(likert_mapping)
data['Attitude9 Coded'] = data['Conserving groundwater will help counteract the impact of a changing climate'].map(likert_mapping)
data['SNorms1 Coded'] = data['My community is currently advocating for groundwater protection'].map(likert_mapping)
data['SNorms2 Coded'] = data['I will save water and irrigate only when necessary because government agencies encourage me to do so'].map(likert_mapping)
data['SNorms3 Coded'] = data['I will save water and irrigate only when necessary because my fellow farmers would approve'].map(likert_mapping)
data['SNorms4 Coded'] = data['My family members would approve of me trying to save water'].map(likert_mapping)
data['PBC1 Coded'] = data['I have sufficient knowledge about the state of groundwater on my farm'].map(likert_mapping)
data['PBC2 Coded'] = data['I am familiar with groundwater conservation or sustainability practices'].map(likert_mapping)
data['PBC3 Coded'] = data['My current irrigation system enables me to save or conserve water'].map(likert_mapping)
data['PBC4 Coded'] = data['I have sufficient resources to adopt water saving techniques'].map(likert_mapping)
data['PBC5 Coded'] = data['I have the freedom to choose whether to change my current irrigation system or not'].map(likert_mapping)
data['Moral1 Coded'] = data['I would feel bad if I did not take steps to conserve groundwater'].map(likert_mapping)
data['Moral2 Coded'] = data['When I implement groundwater conservation measures I feel like a better farmer'].map(likert_mapping)
data['Moral3 Coded'] = data['I feel morally obligated to conserve groundwater'].map(likert_mapping)
data['Moral4 Coded'] = data['Overuse of groundwater and natural resources is against my principles of environmental protection'].map(likert_mapping)
data['RiskP1 Coded'] = data['Climate change is not a big issue because human ingenuity will enable us to adapt to changes'].map(likert_mapping)
data['RiskP2 Coded'] = data['I believe that extreme weather events will happen more frequently in the future'].map(likert_mapping)
data['RiskP3 Coded'] = data['My farm operation will likely be harmed by climate change'].map(likert_mapping)
data['RiskP4 Coded'] = data['In the past 5 years I have noticed more unusual or variable weather on my farm'].map(likert_mapping)
data['RiskP5 Coded'] = data['In the past 5 years I have noticed more unusual or variable weather across the state or region'].map(likert_mapping)
data['RiskP6 Coded'] = data['Extreme weather events in recent years have affected my long-term farm management goals'].map(likert_mapping)
data['Trust1 Coded'] = data['Government institutions are influenced by industry and private interests in their decisions about groundwater'].map(likert_mapping)
data['Trust2 Coded'] = data['Government institutions have sufficient expertise to make decisions regarding groundwater use'].map(likert_mapping)
data['Trust3 Coded'] = data['Government institutions provide sufficient information to the public when making decisions about groundwater conservation'].map(likert_mapping)

In [None]:
# Transform Likert scale for climate change perception. This method will be used for several items
likert1_mapping = {
    'NOT CONCERNED': 1,
    'SLIGHTLY CONCERNED': 2,
    'CONCERNED': 3,
    'VERY CONCERNED': 4,
    'DON’T KNOW': 0  # Handling "Don't know"
}
data['CCPerception1 Coded'] = data['Increased flooding'].map(likert1_mapping)
data['CCPerception2 Coded'] = data['Longer dry periods and drought'].map(likert1_mapping)
data['CCPerception3 Coded'] = data['More frequent extreme rains'].map(likert1_mapping)
data['CCPerception4 Coded'] = data['Increases in saturated soils and ponded water'].map(likert1_mapping)
data['CCPerception5 Coded'] = data['Increased heat stress on crops'].map(likert1_mapping)
data['CCPerception6 Coded'] = data['Increased soil erosion'].map(likert1_mapping)


In [None]:
# Once transformed, check the data
print(data.head())

In [None]:
print(data.columns)

In [None]:
# Save the coded data to a new CSV file
data.to_csv('your_coded_data.csv', index=False)

PCA Analysis next. You should start by running tests to ensure your data is suited for PCA analysis