The script to extract photo taken times of photos I used in dominant color extraction.

In [None]:
import os
import json

    
def extract_json_metadata(image_filename, json_folder, search_key=None):
    """
    Extract 'photoTakenTime' metadata from a JSON file in a separate folder, using the same base name as the image.
    """
    json_files = []
    json_path = f"{(image_filename)}.json"  # Find corresponding JSON
    # Walk through the JSON folder to find the corresponding JSON file
    for foldernames in os.listdir(json_folder):
            if foldernames.startswith("Photos") :  # Match base name of the image file
                file_path = os.path.join(json_folder,foldernames, json_path)
                if os.path.exists(file_path):
                    if search_key:
                        with open(file_path, 'r', encoding='utf-8') as f:
                            try:
                                file_content = json.load(f)
                                if search_key in file_content:
                                    print(f"Found key '{search_key}' in {file_path}")
                                    return file_content.get(search_key, None)  # Return the photoTakenTime if found
                            except json.JSONDecodeError:
                                print(f"Error decoding JSON in file: {file_path}")
                    
                    json_files.append(file_path)

    return json_files 

    
def process_images_in_folder(image_folder, json_folder):
    """
    Process all images in a folder to extract their photoTakenTime metadata from a separate JSON folder.
    """
    results = []
    for file_name in os.listdir(image_folder):
        if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):  # Check for image files
            image_path = os.path.join(image_folder, file_name)
            photo_taken_time = extract_json_metadata(file_name, json_folder,"photoTakenTime")  # Extract photoTakenTime from JSON
            results.append({"image_path": image_path, "photo_taken_time": photo_taken_time})
    return results

def process_photos_in_year_folder(base_folder, json_folder):
    """
    Process folders for each year (starting with Photos_20) to extract photoTakenTime from images using JSON from a separate folder.
    """
    year_data = {}
    for year_folder in os.listdir(base_folder):
        year_path = os.path.join(base_folder, year_folder)
        if os.path.isdir(year_path) and year_folder.startswith("Photos_20"):  # Look for Photos_20** folders
            year_data[year_folder] = {}
            for month_folder in os.listdir(year_path):
                month_path = os.path.join(year_path, month_folder)
                if os.path.isdir(month_path):  # Process each month folder
                    month_data = process_images_in_folder(month_path, json_folder)  # Extract photoTakenTime from each image
                    year_data[year_folder][month_folder] = month_data
    return year_data

# Base folder containing year/month photo folders
base_folder = r"C:\Users\Lenovo\Desktop"  # Adjust path to your actual base folder

# Separate folder where JSON files are stored
json_folder = r"C:\Users\Lenovo\Desktop\google_takeout\takeout-20250108T011611Z-001"  # Adjust path to your actual JSON folder

# The key you're searching for inside the JSON files (optional)
search_key = "photoTakenTime"

# Process and store results for photoTakenTime extraction
photo_data = process_photos_in_year_folder(base_folder, json_folder)

# Save the results to a JSON file
output_file = "photo_taken_times.json"
with open(output_file, "w") as f:
    json.dump(photo_data, f, indent=4)

print(f"Photo taken times saved to {output_file}.")


Photo taken time of all years visualized as a scatter plot. Also the top 5 hours where I took the most photos.

In [None]:
import json
import altair as alt
import pandas as pd
from datetime import datetime, timezone, timedelta

alt.data_transformers.enable("vegafusion")

# Load the JSON data
with open("photo_taken_times.json", "r") as file:
    data = json.load(file)

# Extract colors and counts from the JSON data
def extract_times(json_data):
    timestamp = []
    years = []
    months = []
    for year, months_data in json_data.items():
        for month, color_data in months_data.items():
            for entry in color_data:
                #if month.startswith("Jan"):
                    if "photo_taken_time" in entry and "timestamp" in entry["photo_taken_time"]:
                        timestamp_str = entry["photo_taken_time"]["timestamp"]
                        timestamp.append(int(timestamp_str))
                        years.append(year)
                        months.append(month)
    return timestamp, years, months

timestamps,years, months = extract_times(data)
time_stamp=[]
t_hour=[]
hours=[]
for t in timestamps:
    t = datetime.fromtimestamp(t, timezone(timedelta(hours=3)))
    
    # Extract hour, minute, and second
    hour = t.hour
    minute = t.minute
    second = t.second
    hours.append(hour)
    # Combine into a single integer
    combined_time = float(f"{hour:02d}.{minute:02d}{second:02d}")
    t_hour.append(combined_time)
    time_stamp.append(f"{hour:02d}:{minute:02d}:{second:02d}")
    

from collections import Counter

hour_counts = Counter(hours)
top_5_common_hours = hour_counts.most_common(5)
print("Top 5 most common hours:")
for hour, count in top_5_common_hours:
    print(f"{hour}:00 with {count} occurrences.")
# Create a DataFrame for Altair
df = pd.DataFrame({
    'Timestamp': t_hour,
    'Year': years,
    'Month': months,
    'time' : time_stamp,
})


year_colors_range= ['#473892','#117733','#49BBA8','#88CCEE','#DDCC77','#CC6677', '#AA4499','#882255','#44AA53']



year_selection = alt.selection_point(
    fields=['Year'],
    name='Select',
    bind=alt.binding_select(options=[None]+list(df['Year'].unique()), labels = ['All'] + list(df['Year'].unique()))
)
print(df['Year'])
chart = alt.Chart(df).mark_point().encode(
    x=alt.X('Timestamp:Q', scale=alt.Scale(domain=[0, 24]),title='Hour of the Day'),
    y=alt.Y('Month:N', title='Month'),
    color=alt.Color('Year:N', title='Year').scale(domain=list(df['Year'].unique()), range=year_colors_range),
    tooltip=['Month:N', 'Year:N', 'time:N']  
).add_params(
    year_selection
).transform_filter(
    year_selection).properties(
    width=800,
    height=800,
    title="Photo Times by Hour"
)



chart.configure_axis(
    labelAngle=0,  # Rotate labels to make them readable
    labelFontSize=12,
    titleFontSize=14
).configure_view(
    stroke=None  # Remove border
).show()
chart.save("photo_taken_times_months.html")


Taken photo count of each year visualized as a scatter chart.

In [None]:

import altair as alt
import pandas as pd


data=pd.read_csv("dsa.csv")
df = pd.read_csv("dsa.csv", delimiter=";")
# Reshape the DataFrame to a long format
df.rename(columns={"Unnamed: 0": "Month"}, inplace=True)

# Reshape the DataFrame to a long format
df_long = df.melt(id_vars=['Month'], var_name='Year', value_name='Count')

month_colors = {
    'Jan': '#1f77b4',
    'Feb': '#ff7f0e',
    'March': '#2ca02c',
    'April': '#d62728',
    'May': '#9467bd',
    'June': '#8c564b',
    'July': '#e377c2',
    'Aug': '#7f7f7f',
    'Sep': '#bcbd22',
    'Oct': '#17becf',
    'Nov': '#e7969c',
    'Dec': '#9edae5'
}

month_selection = alt.selection_point(
    fields=['Month'],
    name='Select',
    bind=alt.binding_select(options=[None]+list(df['Month'].unique()), labels = ['All'] + list(df['Month'].unique()))
)

# Create the scatter plot
scatter_chart = alt.Chart(df_long).mark_circle(size=60).encode(
    x=alt.X('Year:N', title='Year'),
    y=alt.Y('Count:Q', title='Photo Count'),
    color=alt.Color('Month:N', title='Month', scale=alt.Scale(domain=list(month_colors.keys()), range=list(month_colors.values()))),
    tooltip=['Month', 'Year', 'Count']
).add_params(
    month_selection
).transform_filter(
    month_selection).properties(
    title='Photo Counts by Month and Year',
    width=800,
    height=400
)
scatter_chart.configure(
    
).show()

scatter_chart.save("photo_count_sc.html")
