# Kje naj danes kolesarim?
## Geografska analiza rekreativnega kolesarjenja v Sloveniji
---
Urejanje podatkov v neki srednji obliki iz strave

In [1]:
import json

In [32]:
# Trenutni podatki
with open("data/intermediate/detailed_segments.json") as f:
    segments = json.load(f)

In [33]:
def atributi(segments):
    """Vrne seznam atributov segmentov."""
    return list(segments[0].keys())

# Izpis atributov segmentov
print("Atributi segmentov:")
count = 0
for key in atributi(segments):
    count += 1
    print(key)

print(f"Skupaj atributov: {count}")

Atributi segmentov:
id
resource_state
name
activity_type
distance
average_grade
maximum_grade
elevation_high
elevation_low
start_latlng
end_latlng
elevation_profile
elevation_profiles
climb_category
city
state
country
private
hazardous
starred
created_at
updated_at
total_elevation_gain
map
effort_count
athlete_count
star_count
athlete_segment_stats
xoms
local_legend
Skupaj atributov: 30


Odstranjevanje nepotrebnih atributov: 
```
    attributes_to_remove = [
        "resource_state", "city", "state", "created_at", "updated_at",
        "private", "athlete_segment_stats", "hazardous", "starred",
        "star_count", "local_legend", "athlete_count", "climb_category"
    ]
```

In [34]:
def attributes_cleaning(segments):
    """Čisti podatke segmentov."""
    
    # List of attributes to remove
    attributes_to_remove = [
        "resource_state", "city", "state", "created_at", "updated_at",
        "private", "athlete_segment_stats", "hazardous", "starred",
        "star_count", "local_legend", "athlete_count", "climb_category"
    ]
    
    # Removing unnecessary attributes
    for segment in segments:
        for attribute in attributes_to_remove:
            if attribute in segment:
                del segment[attribute]
        xoms = segment['xoms']
        del segment['xoms']
        segment['kom'] = xoms['kom']
        segment['qom'] = xoms['qom']


    with open("data/intermediate/cleaned_segments1.json", "w") as f:
        json.dump(segments, f, indent=4)

In [35]:
attributes_cleaning(segments)
segments_cleaned = segments

In [36]:
print(segments_cleaned[0].keys())
print(len(segments_cleaned))

dict_keys(['id', 'name', 'activity_type', 'distance', 'average_grade', 'maximum_grade', 'elevation_high', 'elevation_low', 'start_latlng', 'end_latlng', 'elevation_profile', 'elevation_profiles', 'country', 'total_elevation_gain', 'map', 'effort_count', 'kom', 'qom'])
1526


Dodajanje lastne kategorije klancem, stravino kategoriziranje klancev je za slovenske razmere lahko rahlo neugodno, saj imamo v sloveniji le nekaj klancev najvišje kategorije (zgledujejo se po alpskih klancih) zato je tu alstno kategoriziranje, klanci ki niso ravni so razdeljeni v 5 približno enakih skupin, glede na izračunan climb score. 

In [None]:
import pandas as pd

def custom_climb_category():
    """Dodeli kategorijo vzpona segmentom."""

    df = pd.read_json("data/intermediate/cleaned_segments1.json")

    # Izračunaj climb_score
    df["climb_score"] = (df["total_elevation_gain"] * df["average_grade"]) / 100

    df["is_flat"] = df["average_grade"].abs() < 1.5

    df_climbs = df[~df["is_flat"]].copy()

    climb_categories = pd.qcut(df_climbs["climb_score"], 5, labels=[1, 2, 3, 4, 5])
    climb_categories = climb_categories.astype(int)

    df.loc[df_climbs.index, "custom_climb_category"] = climb_categories
    df.loc[df["is_flat"], "custom_climb_category"] = 0

    # Shrani, če želiš
    df.to_json("data/intermediate/segments_climb_category.json", orient="records")


In [39]:
custom_climb_category()

with open("data/intermediate/segments_climb_category.json") as f:
    segments_climb_category = json.load(f)

print(len(segments_climb_category))
print(segments_climb_category[0].keys())

with open("data/intermediate/segments_climb_category.json", "w") as f:
    json.dump(segments_climb_category, f, indent=4)

1526
dict_keys(['id', 'name', 'activity_type', 'distance', 'average_grade', 'maximum_grade', 'elevation_high', 'elevation_low', 'start_latlng', 'end_latlng', 'elevation_profile', 'elevation_profiles', 'country', 'total_elevation_gain', 'map', 'effort_count', 'kom', 'qom', 'climb_score', 'is_flat', 'custom_climb_category'])


In [40]:
categories_count = [0] * 6  # 0, 1, 2, 3, 4, 5
for segment in segments_climb_category:
    categories_count[int(segment['custom_climb_category'])] += 1

print("Število segmentov po kategorijah vzponov:")
for i in range(6):
    print(f"Kategorija {i}: {categories_count[i]}")


Število segmentov po kategorijah vzponov:
Kategorija 0: 295
Kategorija 1: 247
Kategorija 2: 246
Kategorija 3: 246
Kategorija 4: 246
Kategorija 5: 246


Še zadnji atributi, določitev regije segmenta in dodaten atribut segmentom ki imajo maximalni naklon več kot 10%.

In [41]:
from shapely.geometry import shape

with open("obcine.geojson") as f:
    data = json.load(f)

regions_geom = []
for region in data['features']:
    region_geom = {
        'name': region['properties']['name'],
        'geometry': shape(region['geometry'])
    }
    regions_geom.append(region_geom)

print(f"Število občin: {len(regions_geom)}")

Število občin: 212


In [46]:
from shapely.geometry import Point

def find_region(segments):
    """Najdi regijo za vsak segment."""
    for segment in segments:
        
        start = Point(segment['start_latlng'][1], segment['start_latlng'][0])
        end = Point(segment['end_latlng'][1], segment['end_latlng'][0])
        
        segment['region'] = []
        for region in regions_geom:
            if region['geometry'].contains(start) or region['geometry'].contains(end):
                segment['region'].append(region['name'])
    return segments

In [47]:
with open("data/intermediate/segments_climb_category.json") as f:
    segments_climb_category = json.load(f)

segments_with_region = find_region(segments_climb_category)

with open("data/clean/segments.json", "w") as f:
    json.dump(segments_with_region, f, indent=4)
    

In [49]:
print(segments_cleaned[0].keys())
print(len(segments_cleaned))

dict_keys(['id', 'name', 'activity_type', 'distance', 'average_grade', 'maximum_grade', 'elevation_high', 'elevation_low', 'start_latlng', 'end_latlng', 'elevation_profile', 'elevation_profiles', 'country', 'total_elevation_gain', 'map', 'effort_count', 'kom', 'qom'])
1526


"čisti" podatki so shranjeni v "data/clean/segments.json". Mogoče bi potem videli da rabim oše kakšen atribut ali nevem ...

```Atributi : 'id', 'name', 'activity_type', 'distance', 'average_grade', 'maximum_grade', 'elevation_high', 'elevation_low', 'start_latlng', 'end_latlng', 'elevation_profile', 'elevation_profiles', 'country', 'total_elevation_gain', 'map', 'effort_count', 'kom', 'qom' ```