In [1]:
import pandas as pd
import random
import numpy as np

In [2]:
# Set random seed for reproducibility
random.seed(60)
np.random.seed(60)

In [6]:
# Real data
towns_data = {
    'Service_town': ['Gbi - Kledzo', 'Gbi - Wegbe', 'Godenu', 'Gbi - Avege', 'Ve-Dator',
                     'Ve- Kobenu', 'Golokwati', 'Afadzo South', 'Ve -Gbodome', 'Wuinta',
                     'Logba', 'Fume', 'Fodome', 'Liati', 'Zimugaziwo snake Village',
                     'Wli', 'Leklebi', 'Gbledi', 'Agome yo', 'Santrokofi', 'Akpafu',
                     'Lolobi', 'Likpe'],
    'distance_in_km': [4.9, 6.2, 19.2, 10.2, 14.3, 13.7, 21.3, 38.4, 25.4, 30.7,
                       38.3, 38.3, 0.65, 20, 16.5, 22.8, 23.5, 1.3, 15.9, 7.5,
                       11.9, 10.1, 17.2],
    'travel_time_in_hours': [0.267, 0.333, 0.5, 0.467, 0.6, 0.583, 0.5, 1.017, 0.667, 0.783,
                             1, 0.917, 0.65, 0.55, 0.467, 0.483, 0.633, 0.05, 0.517, 0.333,
                             0.417, 0.35, 0.467]

}

In [7]:
# Create DataFrame
df_towns = pd.DataFrame(towns_data)
df_towns["distance_in_km"].max()

38.4

In [10]:
# Classify zones (Near: 0-20km, Far: 20-40km)
df_towns['zone'] = df_towns['distance_in_km'].apply(lambda x: 'Near' if x <= 20 else 'Far')

# check
print("TOWN DATA WITH ZONES")
print(df_towns)
print(f"\nNear zone towns: {len(df_towns[df_towns['zone'] == 'Near'])}")
print(f"Far zone towns: {len(df_towns[df_towns['zone'] == 'Far'])}")

TOWN DATA WITH ZONES
                Service_town  distance_in_km  travel_time_in_hours  zone
0               Gbi - Kledzo            4.90                 0.267  Near
1                Gbi - Wegbe            6.20                 0.333  Near
2                     Godenu           19.20                 0.500  Near
3                Gbi - Avege           10.20                 0.467  Near
4                   Ve-Dator           14.30                 0.600  Near
5                 Ve- Kobenu           13.70                 0.583  Near
6                  Golokwati           21.30                 0.500   Far
7               Afadzo South           38.40                 1.017   Far
8                Ve -Gbodome           25.40                 0.667   Far
9                     Wuinta           30.70                 0.783   Far
10                     Logba           38.30                 1.000   Far
11                      Fume           38.30                 0.917   Far
12                    Fodome  

In [11]:
# Fault types with repair times
fault_types = {
    'Transformer failure': 3.0,
    'Line break': 2.0,
    'Pole replacement': 4.0,
    'Meter issue': 1.0
}

# Distribution: 40% transformer, 45% line break, 10% pole, 5% meter
fault_type_weights = [0.40, 0.45, 0.10, 0.05]

# Priority levels
priority_levels = ['High', 'Normal']
priority_weights = [0.35, 0.65]  # 35% high priority, 65% normal

# Generate 20 faults
num_faults = 20

# Randomly select 20 towns (with replacement, so a town can have multiple faults)
selected_towns = random.choices(df_towns['Service_town'].tolist(), k=num_faults)

# Randomly assign fault types
selected_fault_types = random.choices(
    list(fault_types.keys()),
    weights=fault_type_weights,
    k=num_faults
)

# Randomly assign priorities
selected_priorities = random.choices(
    priority_levels,
    weights=priority_weights,
    k=num_faults
)

In [13]:
# Create fault dataset
faults_data = []

for i in range(num_faults):
    town = selected_towns[i]
    fault_type = selected_fault_types[i]
    priority = selected_priorities[i]

    # Get town data
    town_row = df_towns[df_towns['Service_town'] == town].iloc[0]

    fault_dict = {
        'Fault_ID': f'F{i+1}',
        'Town': town,
        'Zone': town_row['zone'],
        'Distance_km': town_row['distance_in_km'],
        'Travel_time_hours': round(town_row['travel_time_in_hours'], 3),
        'Fault_type': fault_type,
        'Repair_time_hours': fault_types[fault_type],
        'Priority': priority
    }

    faults_data.append(fault_dict)

# Create DataFrame
df_faults = pd.DataFrame(faults_data)

print("GENERATED FAULT DATASET (20 FAULTS)")
print(df_faults.to_string(index=False))

GENERATED FAULT DATASET (20 FAULTS)
Fault_ID                     Town Zone  Distance_km  Travel_time_hours          Fault_type  Repair_time_hours Priority
      F1             Afadzo South  Far         38.4              1.017          Line break                2.0     High
      F2                    Liati Near         20.0              0.550          Line break                2.0   Normal
      F3                Golokwati  Far         21.3              0.500          Line break                2.0   Normal
      F4                 Agome yo Near         15.9              0.517          Line break                2.0   Normal
      F5                    Logba  Far         38.3              1.000 Transformer failure                3.0   Normal
      F6             Afadzo South  Far         38.4              1.017 Transformer failure                3.0     High
      F7                   Akpafu Near         11.9              0.417 Transformer failure                3.0   Normal
      F8    

In [15]:
# Summary statistics
print("SUMMARY STATISTICS")
print(f"Total faults: {len(df_faults)}")
print(f"\nFaults by Zone:")
print(df_faults['Zone'].value_counts())
print(f"\nFaults by Type:")
print(df_faults['Fault_type'].value_counts())
print(f"\nFaults by Priority:")
print(df_faults['Priority'].value_counts())
print(f"\nAverage travel time (Near zone): {df_faults[df_faults['Zone']=='Near']['Travel_time_hours'].mean():.3f} hours")
print(f"Average travel time (Far zone): {df_faults[df_faults['Zone']=='Far']['Travel_time_hours'].mean():.3f} hours")

SUMMARY STATISTICS
Total faults: 20

Faults by Zone:
Zone
Near    12
Far      8
Name: count, dtype: int64

Faults by Type:
Fault_type
Transformer failure    10
Line break              7
Pole replacement        3
Name: count, dtype: int64

Faults by Priority:
Priority
Normal    14
High       6
Name: count, dtype: int64

Average travel time (Near zone): 0.479 hours
Average travel time (Far zone): 0.773 hours
