In [23]:
from pathlib import Path
import os
from streettransformer.alignment.validate import (
    validation_table, 
    validate_classifier,
    validate_identifier,
    validate_summarizer,
    validate_dater,
    bulk_validate
)

# Summarizers

In [24]:
import pandas as pd
results = {}

results['summarizer'] = pd.DataFrame({
    ('image-to-document', 'gpt-4o'): validate_summarizer('summarizer-image-4o.csv', 'summarizer-document-4o.csv'),
    ('sidebyside-to-document', 'gpt-4o'): validate_summarizer('summarizer-sidebyside-4o.csv', 'summarizer-document-4o.csv'),
    ('image-to-sidebyside', 'gpt-4o'): validate_summarizer('summarizer-image-4o.csv', 'summarizer-sidebyside-4o.csv')
}).T.reset_index(names=['format', 'model']).to

# Classifiers

In [25]:
INPUTS = {
    ('image', 'gpt-4o'): 'identifier-image-4o.csv',
    ('sidebyside', 'gpt-4o'): 'identifier-sidebyside-4o.csv',
    ('image', 'gpt-5'): 'identifier-image-5.csv',
    ('sidebyside', 'gpt-5'): 'identifier-sidebyside-5.csv'
}

results['classifier'] = bulk_validate(
    inputs=INPUTS,
    validation_function=validate_classifier,
    groundtruth_path='change_classifier.parquet',
    true_class_col='class',
    pred_class_col= 'change_detected'
)

116 NA records removed
     location_id year_start year_end pred_class  true_class
0            604       2006     2014      False       False
1           3047       2006     2018      False       False
2           3893       2006     2014      False       False
3           3893       2012     2018       True       False
4           4317       2006     2024       True        True
..           ...        ...      ...        ...         ...
991      9065997       2012     2014       True       False
992      9066199       2014     2024      False       False
993      9066221       2006     2024       True        True
995      9066224       2014     2024       True        True
996      9066225       2006     2012       True       False

[881 rows x 5 columns]
74 NA records removed
     location_id year_start year_end pred_class  true_class
0            604       2006     2014      False       False
1           3047       2006     2018       True       False
2           3893       2006    

In [15]:
for _, i in INPUTS.items():
    classifier = i.replace('identifier', 'classifier')
    validation_table(
        i,
        groundtruth_path='change_classifier.parquet',
        true_class_col='class',
        pred_class_col= 'change_detected'
    ).to_csv(f'../data/exports/data_help/validation_tables/{classifier}', index=False)

    validation_table(
        i,
        groundtruth_path='change_identifier.parquet',
        true_class_col='correct_tags',
        pred_class_col= 'features'
    ).to_csv(f'../data/exports/data_help/validation_tables/{i}', index=False)


116 NA records removed
133 NA records removed
74 NA records removed
91 NA records removed
887 NA records removed
871 NA records removed
887 NA records removed
871 NA records removed


# Identifiers

In [26]:
results['identifier'] = bulk_validate(
    inputs=INPUTS,
    validation_function=validate_identifier,
    groundtruth_path='change_identifier.parquet',
    true_class_col='correct_tags',
    pred_class_col= 'features'
)

133 NA records removed
     location_id year_start year_end  \
0            604       2006     2014   
1           3047       2006     2018   
2           3893       2006     2014   
3           3893       2012     2018   
4           4317       2006     2024   
..           ...        ...      ...   
990      9064661       2006     2018   
991      9065997       2012     2014   
993      9066221       2006     2024   
995      9066224       2014     2024   
996      9066225       2006     2012   

                                            pred_class  \
0                                                   []   
1                                                   []   
2                                                   []   
3                 [Curb Extensions, Sidewalk Redesign]   
4    [Curb Extensions, New or Expanded Median/Pedes...   
..                                                 ...   
990  [Lane Removal or Road Narrowing, Sidewalk Rede...   
991  [New or Expanded Median/Ped

# Daters

In [None]:
from streettransformer.alignment.validate import DaterKey, DaterOutput
from dataclasses import fields

val_df_sbs_4o = validation_table(
    'dater-sidebyside-4o.csv',
    'dater.parquet',
    'proj_year',
    'change_locations',
    DaterKey, 
    DaterOutput
)

val_df_img_4o = validation_table(
    'dater-image-4o.csv',
    'dater.parquet',
    'proj_year',
    'change_locations',
    DaterKey, 
    DaterOutput
)


val_df_sbs_5 = validation_table(
    'dater-sidebyside-5.csv',
    'dater.parquet',
    'proj_year',
    'change_locations',
    DaterKey, 
    DaterOutput
)

val_df_img_5 = validation_table(
    'dater-image-5.csv',
    'dater.parquet',
    'proj_year',
    'change_locations',
    DaterKey, 
    DaterOutput
)

# validate_dater(val_df,
#                'proj_year','change_locations')

INPUTS = {
    ('image', 'gpt-4o'): 'dater-image-4o.csv',
#    ('sidebyside', 'gpt-4o'): 'dater-sidebyside-4o.csv'
}

# results['dater'] = bulk_validate(
# # bulk_validate(
#     inputs=INPUTS,
#     validation_function=validate_identifier,
#     groundtruth_path='dater.parquet',
#     true_class_col='proj_year',
#     pred_class_col= 'change_locations',
#     key_obj=DaterKey,
#     results_obj=DaterOutput
# )

val_df_img_4o.to_csv('../data/exports/metrics/validations/dater_image_4o.csv')
val_df_sbs_4o.to_csv('../data/exports/metrics/validations/dater_sbs_4o.csv')
val_df_img_5.to_csv('../data/exports/metrics/validations/dater_image_5.csv')
val_df_sbs_5.to_csv('../data/exports/metrics/validations/dater_sb_5.csv')



1404 NA records removed
1262 NA records removed
1624 NA records removed
1604 NA records removed


In [30]:
results['summarizer'].to_csv('../data/exports/metrics/summarizer.csv')

In [37]:
results['classifier'].to_csv('../data/exports/metrics/classifier.csv')

In [38]:
results['identifier'].to_csv('../data/exports/metrics/identifier.csv')
# validation_table(
#     'identifier-image-4o.csv',
#     'change_identifier.parquet',
#     true_class_col='correct_tags',
#     pred_class_col= 'features'
# )

In [None]:
results['dater']
#val_df_img

KeyError: 'dater'

In [None]:
from streettransformer.llms.queries import QUERIES
print(QUERIES['image_change_dater'].text())


        Role: You are a Transportation Engineer employed by the city tasked with analyzing changes in intersection streetscape over time.

        Goal: Your goal is to look at an ordered set of images of the same location across different dates, and identify when the significant infrastrcutural change occured, if it did at all. Limit this analysis to only capital reconstruction features including: 'Curb Extensions', 'New or Expanded Median/Pedestrian Refuge Island', 'Bike Enhancement', 'Median Tip Extension', 'Raised Median', 'Lane Removal or Road Narrowing', 'Bus Bulb', 'Shared Street', 'Sidewalk Redesign')

        Respond: Please respond in a well formatted json exclusively with 2 tags:
	- change_detected: A boolean value if you detect significant change with regards to the above categories. Respond exclusively with True or False
	- change_locations: A list of image IDs that you believe show a significant change from the image before. Please only respond with ['2006', '2012', '201