### Analysis and tabulation of the frame

In [11]:
# Import required libraries
import pandas as pd

## Loading the dataset
companies = pd.read_csv('companies_with_urbanization.csv')

# Drop the specified columns
columns_to_drop = ['smod_year', 'smod_code', 'geocoded']
companies = companies.drop(columns=[col for col in columns_to_drop if col in companies.columns])

# Rename 'smod_label' column to 'urbanization'
companies = companies.rename(columns={'smod_label': 'urbanization'})

# Display the first few rows to verify
companies.head()

Unnamed: 0,name,address,city,phone,source_url,latitude,longitude,province,district,urbanization
0,Caroline Garments P/L t/a Caroline Safety,"26 Bon Accord Rd, Westondale, Bulawayo, Zimbabwe",Bulawayo,+263 29 22776452003,https://www.zimbabweyp.com/company/88429/Carol...,-20.155479,28.55079,Bulawayo,Bulawayo,Urban centre
1,Pest Portal Zimbabwe (Pvt) Ltd,"Office 11, 3D Building Strathaven Building, Av...",Harare,07725933442007,https://www.zimbabweyp.com/company/104158/Pest...,-17.792565,31.033528,Harare,Harare,Urban centre
2,Busy Bee Transcribing,"13 Ascot Road, Avondale West, Harare , Zimbabwe",Harare,0242-3089692012,https://www.zimbabweyp.com/company/106087/Busy...,-17.783362,31.02496,Harare,Harare,Urban centre
3,Athol Evans Hospita Home,"P O Box Cr 70 Cranb Cranborne, Harare, Zimbabwe",Harare,570 8875,https://www.zimbabweyp.com/company/14492/Athol...,-17.847295,31.072463,Harare,Harare,Urban centre
4,MEGA MARKET,"Target Area, Mutare, Zimbabwe",Mutare,263-20-61517,https://www.zimbabweyp.com/company/4595/MEGA_M...,-18.972991,32.671469,Manicaland,Mutare,Urban centre


### Descriptive Statistics

In [12]:
# Create province by urbanization crosstab
province_urban = pd.crosstab(index=companies['province'],
                            columns=companies['urbanization'],
                            margins=True, margins_name="Total")

# Create district by urbanization crosstab
district_urban = pd.crosstab(index=companies['district'],
                            columns=companies['urbanization'],
                            margins=True, margins_name="Total")

# Sort districts by province for better readability
district_urban = district_urban.reindex(companies.drop_duplicates('district')
                                      .sort_values('province')['district']
                                      .tolist() + ['Total'])

# Save to Excel with multiple sheets
with pd.ExcelWriter('urbanization_analysis.xlsx', engine='openpyxl') as writer:
    province_urban.to_excel(writer, sheet_name='Province by Urbanization')
    district_urban.to_excel(writer, sheet_name='District by Urbanization')

    # Auto-adjust column widths
    for sheet_name in writer.sheets:
        worksheet = writer.sheets[sheet_name]
        # Get the max length of content in each column
        for col in worksheet.columns:
            max_length = 0
            column = col[0].column_letter  # Get the column name
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(str(cell.value))
                except:
                    pass
            adjusted_width = (max_length + 2) * 1.2
            worksheet.column_dimensions[column].width = min(adjusted_width, 30)

print("Analysis saved to 'urbanization_analysis.xlsx'")
province_urban, district_urban  # Display the dataframes

Analysis saved to 'urbanization_analysis.xlsx'


(urbanization         Dense urban cluster  Low density rural  Rural cluster  \
 province                                                                     
 Bulawayo                              35                 46             23   
 Harare                                48                 93              0   
 Manicaland                            32                 42             12   
 Mashonaland Central                   32                 32              1   
 Mashonaland East                       3                 65             15   
 Mashonaland West                      26                104              9   
 Masvingo                               9                 32             17   
 Matabeleland North                   213                 37             13   
 Matabeleland South                    23                 16             12   
 Midlands                               6                 45              6   
 Total                                427           