In [48]:
from google.colab import drive
import pandas as pd
from IPython.display import display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

drive.mount('/content/drive')

old_churches_1 = '/content/drive/MyDrive/OregonChurches/Old_Churches_Oregon_1.csv'
old_churches_2 = '/content/drive/MyDrive/OregonChurches/Old_Churches_Oregon_2.csv'
old_churches_3 = '/content/drive/MyDrive/OregonChurches/Old_Churches_Oregon_3.csv'
latest_churches = '/content/drive/MyDrive/OregonChurches/Churches_Latest.csv'

df_old_1 = pd.read_csv(old_churches_1)
df_old_2 = pd.read_csv(old_churches_2)
df_old_3 = pd.read_csv(old_churches_3)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [49]:
df_old_1.columns = df_old_1.columns.str.strip()
df_old_2.columns = df_old_2.columns.str.strip()
df_old_3.columns = df_old_3.columns.str.strip()

common_columns = ['Business Name', 'Entity Type', 'Nonprofit Type', 'Registry Date',
                  'Associated Name Type', 'Address', 'City', 'State', 'Zip Code']

common_columns_1 = [col for col in common_columns if col in df_old_1.columns]
common_columns_2 = [col for col in common_columns if col in df_old_2.columns]
common_columns_3 = [col for col in common_columns if col in df_old_3.columns]

df_old_1_filtered = df_old_1[common_columns_1]
df_old_2_filtered = df_old_2[common_columns_2]
df_old_3_filtered = df_old_3[common_columns_3]
df_old_merged = pd.concat([df_old_1_filtered, df_old_2_filtered, df_old_3_filtered], ignore_index=True)

df_latest = pd.read_csv(latest_churches)
df_latest.columns = df_latest.columns.str.strip()
df_latest_renamed = df_latest.rename(columns={
    'NAME': 'Business Name',
    'STREET': 'Address',
    'CITY': 'City',
    'STATE': 'State',
    'ZIP': 'Zip Code',
})

df_latest_filtered = df_latest_renamed[[col for col in common_columns if col in df_latest_renamed.columns]]
df_latest_filtered = df_latest_filtered[df_latest_filtered['Business Name'].str.contains('church', case=False, na=False)]
df_latest_filtered_file = '/content/drive/MyDrive/OregonChurches/Churches_Latest.csv'
df_latest_filtered.to_csv(df_latest_filtered_file, index=False)

display(df_old_merged.head())
display(df_latest_filtered.head())

Unnamed: 0,Business Name,Entity Type,Nonprofit Type,Registry Date,Associated Name Type,Address,City,State,Zip Code
0,THURSTON CHRISTIAN CHURCH,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1892-12-01 00:00:00,PRINCIPAL PLACE OF BUSINESS,6680 THURSTON RD,SPRINGFIELD,OR,97478
1,CENTENNIAL CHRISTIAN CENTER,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1897-04-23 00:00:00,PRINCIPAL PLACE OF BUSINESS,1020 JANUS ST,SPRINGFIELD,OR,97477
2,"FIRST BAPTIST CHURCH OF SPRINGFIELD, OREGON",DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1914-11-07 00:00:00,PRINCIPAL PLACE OF BUSINESS,1175 G ST,SPRINGFIELD,OR,97477
3,NEW LIFE CHURCH FIRST ASSEMBLY OF GOD SPRINGFIELD,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1940-12-30 00:00:00,PRINCIPAL PLACE OF BUSINESS,2080 N 19TH ST,SPRINGFIELD,OR,97477
4,FIRST CHRISTIAN CHURCH OF SPRINGFIELD,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1942-09-24 00:00:00,PRINCIPAL PLACE OF BUSINESS,395 W CENTENNIAL BLVD,SPRINGFIELD,OR,97477


Unnamed: 0,Business Name,Address,City,State,Zip Code
0,HILLSBORO VINEYARD CHURCH,511 SW 211TH AVE,BEAVERTON,OR,97006-6420
1,SKYLINE BAPTIST CHURCH,4787 SKYLINE RD S,SALEM,OR,97306-2404
2,VERTICAL CHURCH,PO BOX 707,LA PINE,OR,97739-0707
3,CREEKSIDE COMMUNITY CHURCH,PO BOX 1710,SILVERTON,OR,97381-0365
4,MIRROR POND FREE METHODIST CHURCH,PO BOX 214,BEND,OR,97709-0214


In [50]:
remaining_churches = df_old_merged[df_old_merged['Business Name'].isin(df_latest_filtered['Business Name'])]
remaining_churches_cleaned = remaining_churches.drop_duplicates(subset=['Business Name'])
remaining_churches_file = '/content/drive/MyDrive/OregonChurches/Remaining_Churches_Cleaned.csv'
remaining_churches_cleaned.to_csv(remaining_churches_file, index=False)
display(remaining_churches_cleaned.head())

churches_no_longer_exist = df_old_merged[~df_old_merged['Business Name'].isin(df_latest_filtered['Business Name'])]
churches_no_longer_exist_cleaned = churches_no_longer_exist.drop_duplicates(subset=['Business Name'])
no_longer_exist_file = '/content/drive/MyDrive/OregonChurches/Churches_No_Longer_Exist_Cleaned.csv'
churches_no_longer_exist_cleaned.to_csv(no_longer_exist_file, index=False)
display(churches_no_longer_exist_cleaned.head())

Unnamed: 0,Business Name,Entity Type,Nonprofit Type,Registry Date,Associated Name Type,Address,City,State,Zip Code
4,FIRST CHRISTIAN CHURCH OF SPRINGFIELD,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1942-09-24 00:00:00,PRINCIPAL PLACE OF BUSINESS,395 W CENTENNIAL BLVD,SPRINGFIELD,OR,97477
8,TWIN RIVERS CHURCH,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1950-09-08 00:00:00,PRINCIPAL PLACE OF BUSINESS,1660 MOHAWK BLVD,SPRINGFIELD,OR,97477
11,SPRINGFIELD CHURCH OF GOD,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1951-09-28 00:00:00,PRINCIPAL PLACE OF BUSINESS,1369 D ST,SPRINGFIELD,OR,97477
13,CITY OF DESTINY CHURCH,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1952-11-24 00:00:00,PRINCIPAL PLACE OF BUSINESS,2065 CENTENNIAL BLVD,SPRINGFIELD,OR,97477
16,CAMP CREEK CHURCH,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1954-03-02 00:00:00,PRINCIPAL PLACE OF BUSINESS,37535 UPPER CAMP CREEK RD,SPRINGFIELD,OR,97478


Unnamed: 0,Business Name,Entity Type,Nonprofit Type,Registry Date,Associated Name Type,Address,City,State,Zip Code
0,THURSTON CHRISTIAN CHURCH,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1892-12-01 00:00:00,PRINCIPAL PLACE OF BUSINESS,6680 THURSTON RD,SPRINGFIELD,OR,97478
1,CENTENNIAL CHRISTIAN CENTER,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1897-04-23 00:00:00,PRINCIPAL PLACE OF BUSINESS,1020 JANUS ST,SPRINGFIELD,OR,97477
2,"FIRST BAPTIST CHURCH OF SPRINGFIELD, OREGON",DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1914-11-07 00:00:00,PRINCIPAL PLACE OF BUSINESS,1175 G ST,SPRINGFIELD,OR,97477
3,NEW LIFE CHURCH FIRST ASSEMBLY OF GOD SPRINGFIELD,DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1940-12-30 00:00:00,PRINCIPAL PLACE OF BUSINESS,2080 N 19TH ST,SPRINGFIELD,OR,97477
5,"TRINITY BAPTIST CHURCH OF SPRINGFIELD, OREGON",DOMESTIC NONPROFIT CORPORATION,RELIGIOUS WITH MEMBERS,1945-03-22 00:00:00,PRINCIPAL PLACE OF BUSINESS,1162 B ST,SPRINGFIELD,OR,97477


In [51]:
remaining_churches = pd.read_csv('/content/drive/MyDrive/OregonChurches/Remaining_Churches_Cleaned.csv')
churches_no_longer_exist = pd.read_csv('/content/drive/MyDrive/OregonChurches/Churches_No_Longer_Exist_Cleaned.csv')

remaining_churches['Longevity'] = 1
churches_no_longer_exist['Longevity'] = 0

df = pd.concat([remaining_churches, churches_no_longer_exist], ignore_index=True)

label_encoder = LabelEncoder()
df['City'] = label_encoder.fit_transform(df['City'])
df['State'] = label_encoder.fit_transform(df['State'])

X = df[['City', 'State', 'Zip Code']]
y = df['Longevity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')

importances = model.feature_importances_
feature_names = X.columns
for name, importance in zip(feature_names, importances):
    print(f'{name}: {importance:.2f}')

Model Accuracy: 86.70%
City: 0.10
State: 0.01
Zip Code: 0.89
