In [38]:
# Import necessary libraries
from google.colab import drive
import pandas as pd
from IPython.display import display
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import joblib  # For saving and loading the label encoders and scaler
import numpy as np

# Mount Google Drive (if using Google Colab)
drive.mount('/content/drive')

# File paths
old_churches_1 = '/content/drive/MyDrive/OregonChurches/Old_Churches_Oregon_1.csv'
old_churches_2 = '/content/drive/MyDrive/OregonChurches/Old_Churches_Oregon_2.csv'
old_churches_3 = '/content/drive/MyDrive/OregonChurches/Old_Churches_Oregon_3.csv'
latest_churches = '/content/drive/MyDrive/OregonChurches/Churches_Latest.csv'

# Load the data
df_old_1 = pd.read_csv(old_churches_1)
df_old_2 = pd.read_csv(old_churches_2)
df_old_3 = pd.read_csv(old_churches_3)

# Clean column names
df_old_1.columns = df_old_1.columns.str.strip()
df_old_2.columns = df_old_2.columns.str.strip()
df_old_3.columns = df_old_3.columns.str.strip()

# Define common columns
common_columns = ['Business Name', 'Entity Type', 'Nonprofit Type', 'Registry Date',
                  'Associated Name Type', 'Address', 'City', 'State', 'Zip Code']

# Filter dataframes to common columns
common_columns_1 = [col for col in common_columns if col in df_old_1.columns]
common_columns_2 = [col for col in common_columns if col in df_old_2.columns]
common_columns_3 = [col for col in common_columns if col in df_old_3.columns]

df_old_1_filtered = df_old_1[common_columns_1]
df_old_2_filtered = df_old_2[common_columns_2]
df_old_3_filtered = df_old_3[common_columns_3]

# Merge old church data
df_old_merged = pd.concat([df_old_1_filtered, df_old_2_filtered, df_old_3_filtered], ignore_index=True)

# Load latest church data
df_latest = pd.read_csv(latest_churches)
df_latest.columns = df_latest.columns.str.strip()

# Rename columns in latest data to match
df_latest_renamed = df_latest.rename(columns={
    'NAME': 'Business Name',
    'STREET': 'Address',
    'CITY': 'City',
    'STATE': 'State',
    'ZIP': 'Zip Code',
})

# Filter latest data to common columns
df_latest_filtered = df_latest_renamed[[col for col in common_columns if col in df_latest_renamed.columns]]

# Filter to only churches
df_latest_filtered = df_latest_filtered[df_latest_filtered['Business Name'].str.contains('church', case=False, na=False)]

# Save the latest filtered data
df_latest_filtered_file = '/content/drive/MyDrive/OregonChurches/Churches_Latest.csv'
df_latest_filtered.to_csv(df_latest_filtered_file, index=False)

# Determine remaining churches and churches that no longer exist
remaining_churches = df_old_merged[df_old_merged['Business Name'].isin(df_latest_filtered['Business Name'])]
remaining_churches_cleaned = remaining_churches.drop_duplicates(subset=['Business Name'])
remaining_churches_file = '/content/drive/MyDrive/OregonChurches/Remaining_Churches_Cleaned.csv'
remaining_churches_cleaned.to_csv(remaining_churches_file, index=False)

churches_no_longer_exist = df_old_merged[~df_old_merged['Business Name'].isin(df_latest_filtered['Business Name'])]
churches_no_longer_exist_cleaned = churches_no_longer_exist.drop_duplicates(subset=['Business Name'])
no_longer_exist_file = '/content/drive/MyDrive/OregonChurches/Churches_No_Longer_Exist_Cleaned.csv'
churches_no_longer_exist_cleaned.to_csv(no_longer_exist_file, index=False)

# Read the cleaned data
remaining_churches = pd.read_csv('/content/drive/MyDrive/OregonChurches/Remaining_Churches_Cleaned.csv')
churches_no_longer_exist = pd.read_csv('/content/drive/MyDrive/OregonChurches/Churches_No_Longer_Exist_Cleaned.csv')

# Add 'Longevity' label
remaining_churches['Longevity'] = 1
churches_no_longer_exist['Longevity'] = 0

# Combine the data
df = pd.concat([remaining_churches, churches_no_longer_exist], ignore_index=True)

# Use separate label encoders for 'City' and 'State'
label_encoder_city = LabelEncoder()
df['City'] = df['City'].astype(str)  # Ensure 'City' is of type str
df['City_encoded'] = label_encoder_city.fit_transform(df['City'])

label_encoder_state = LabelEncoder()
df['State'] = df['State'].astype(str)  # Ensure 'State' is of type str
df['State_encoded'] = label_encoder_state.fit_transform(df['State'])

# Save the label encoders
joblib.dump(label_encoder_city, '/content/drive/MyDrive/OregonChurches/label_encoder_city.pkl')
joblib.dump(label_encoder_state, '/content/drive/MyDrive/OregonChurches/label_encoder_state.pkl')

# Prepare features and target
X = df[['City_encoded', 'State_encoded', 'Zip Code']]
y = df['Longevity']

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save the scaler
joblib.dump(scaler, '/content/drive/MyDrive/OregonChurches/scaler.pkl')

# Convert data to tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create datasets and data loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# Define the neural network model
class ChurchLongevityNN(nn.Module):
    def __init__(self):
        super(ChurchLongevityNN, self).__init__()
        self.fc1 = nn.Linear(X_train_tensor.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# Initialize the model, loss function, and optimizer
model = ChurchLongevityNN()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')

# Save the trained model
torch.save(model.state_dict(), '/content/drive/MyDrive/OregonChurches/church_longevity_model.pth')

# Inference function
def preprocess_input(city, state, zipcode, label_encoder_city, label_encoder_state, scaler):
    # Encode 'City' and 'State'
    city = str(city)
    state = str(state)
    try:
        city_encoded = label_encoder_city.transform([city])[0]
    except ValueError:
        print(f"City '{city}' not found in label encoder. Using default encoding.")
        city_encoded = -1  # Assign a default value

    try:
        state_encoded = label_encoder_state.transform([state])[0]
    except ValueError:
        print(f"State '{state}' not found in label encoder. Using default encoding.")
        state_encoded = -1  # Assign a default value

    # Prepare the input array
    input_array = np.array([[city_encoded, state_encoded, int(zipcode)]], dtype=np.float32)

    # Handle default encoding case
    # Replace -1 with mean encoding from training data (optional)
    if city_encoded == -1:
        input_array[0, 0] = X['City_encoded'].mean()
    if state_encoded == -1:
        input_array[0, 1] = X['State_encoded'].mean()

    # Scale the input
    input_scaled = scaler.transform(input_array)

    # Convert to tensor
    input_tensor = torch.tensor(input_scaled, dtype=torch.float32)

    return input_tensor

def predict_from_input(city, state, zipcode, model, label_encoder_city, label_encoder_state, scaler):
    # Preprocess the input
    input_tensor = preprocess_input(city, state, zipcode, label_encoder_city, label_encoder_state, scaler)

    # Perform inference
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        prediction = output.item()

    return prediction

# Load the label encoders and scaler
label_encoder_city = joblib.load('/content/drive/MyDrive/OregonChurches/label_encoder_city.pkl')
label_encoder_state = joblib.load('/content/drive/MyDrive/OregonChurches/label_encoder_state.pkl')
scaler = joblib.load('/content/drive/MyDrive/OregonChurches/scaler.pkl')

# Load the trained model
model = ChurchLongevityNN()
model.load_state_dict(torch.load('/content/drive/MyDrive/OregonChurches/church_longevity_model.pth'))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch [1/50], Loss: 0.6201
Epoch [2/50], Loss: 0.4569
Epoch [3/50], Loss: 0.3715
Epoch [4/50], Loss: 0.3630
Epoch [5/50], Loss: 0.3660
Epoch [6/50], Loss: 0.3608
Epoch [7/50], Loss: 0.3563
Epoch [8/50], Loss: 0.3612
Epoch [9/50], Loss: 0.3597
Epoch [10/50], Loss: 0.3695
Epoch [11/50], Loss: 0.3596
Epoch [12/50], Loss: 0.3596
Epoch [13/50], Loss: 0.3652
Epoch [14/50], Loss: 0.3733
Epoch [15/50], Loss: 0.3563
Epoch [16/50], Loss: 0.3691
Epoch [17/50], Loss: 0.3595
Epoch [18/50], Loss: 0.3653
Epoch [19/50], Loss: 0.3597
Epoch [20/50], Loss: 0.3557
Epoch [21/50], Loss: 0.3567
Epoch [22/50], Loss: 0.3643
Epoch [23/50], Loss: 0.3603
Epoch [24/50], Loss: 0.3560
Epoch [25/50], Loss: 0.3557
Epoch [26/50], Loss: 0.3554
Epoch [27/50], Loss: 0.3604
Epoch [28/50], Loss: 0.3646
Epoch [29/50], Loss: 0.3609
Epoch [30/50], Loss: 0.3655
Epoch [31/50], Loss: 0.3694
Epoch [32/50

  model.load_state_dict(torch.load('/content/drive/MyDrive/OregonChurches/church_longevity_model.pth'))


<All keys matched successfully>

In [42]:
city = input("Enter City: ")
zipcode = input("Enter ZIP code: ")

result = predict_from_input(city, "OR", zipcode, model, label_encoder_city, label_encoder_state, scaler)

print(f'Prediction for {city}, ZIP code {zipcode}: After 8 years, your church has the chance of {result} staying in business.')

Enter City: SPRINGFIELD
Enter ZIP code: 97477
Prediction for SPRINGFIELD, ZIP code 97477: After 8 years, your church has the chance of 0.18564321100711823 staying in business.


