# Projekt - report to pdf

# Import knihoven

In [1]:
import psycopg2
import openpyxl as opxl

import pandas as pd
import plotly.express as px

from pprint import pprint

# Import Datasetů 
- https://policie.gov.cz/clanek/statistika-nehodovosti.aspx
- Data z let 2023 - 2025
- Převedla jsem na formát xlsx
- Importováno do PostgreSQL (PostgreSQL database_dopravní nehody ČR)
- upraveny datumy a čas
- vytvořeny náhledy pro využití v další analýze

  # Připojení k databázi PostgreSQL

In [2]:
def execute_sql(sql_query: str) -> list: 
    connection = None
    data = None
    try:
        connection =  psycopg2.connect(
            host='localhost',
            user='postgres',
            password='kjm57',
            dbname='postgres')
    
        cursor = connection.cursor()
        cursor.execute(sql_query)
        data = cursor.fetchall()
        colnames = [cell[0] for cell in cursor.description]
        df_name = pd.DataFrame(data, columns=colnames)
    except Exception as E:
        print('ERROR')
    
    finally:
        if connection is not None:
            connection.close()

        return df_name

# Úprava a čištění dat

In [4]:
df_animal_involvement = execute_sql("SELECT * FROM dopravni_nehody_cr.animal_involvement")
df_accidents_in_time = execute_sql("SELECT * FROM dopravni_nehody_cr.accidents_in_time")
df_drivers = execute_sql("SELECT * FROM dopravni_nehody_cr.drivers")
df_first_aid = execute_sql("SELECT * FROM dopravni_nehody_cr.first_aid")
df_pedestrian_involvement = execute_sql("SELECT * FROM dopravni_nehody_cr.pedestrian_involvement")
df_accidents_crash = execute_sql("SELECT * FROM dopravni_nehody_cr.accidents_crash")
df_column_names = execute_sql("SELECT * FROM dopravni_nehody_cr.column_names")
df_data_description = execute_sql("SELECT * FROM dopravni_nehody_cr.data_description")

In [100]:
# Funkce pro kontrolu prázdných hodnot
def null_values(table):
    counts = table.isna().sum()
    df_na_check = pd.DataFrame({
        'column': counts.index,
        'na_count': counts.values,
        'na%': (counts.values / len(table) * 100).round(2)
    })
    df_na_check = df_na_check.merge(df_column_names,
                                    left_on='column',
                                    right_on='code',
                                    how='left')
    df_na_check = df_na_check.drop(columns=['code','table_name'])
    df_na_check = df_na_check[['column','descr','name_column_en','na_count','na%']]

    row_count = pd.DataFrame({
        'column': ['TOTAL'],
        'descr': ['Celkový počet záznamů'],
        'name_column_en': ['Total row count'],
        'na_count': [len(table)],
        'na%': [' ']
    })
    df_na_check = pd.concat([df_na_check, row_count], ignore_index=True)
    return df_na_check

def get_table_column_names(table):
    column_names = table.columns
    df_column_info = pd.DataFrame({
        'column': column_names
    })
    df_column_info = df_column_info.merge(df_column_names,
                                          left_on='column',
                                          right_on='code',
                                          how='left')
    df_column_info = df_column_info.drop(columns=['code'])
    return df_column_info


In [101]:
null_values(df_accidents_in_time)

Unnamed: 0,column,descr,name_column_en,na_count,na%
0,p1,identifikační_číslo_nehody,ID_accident,0,0.0
1,id_vozidla,,,0,0.0
2,p2a,den_měsíc_rok,day_month_year,0,0.0
3,p2b,čas,time,74377,14.32
4,p5a,lokalita_nehody,locality,0,0.0
5,p6,druh_nehody,accident_type,0,0.0
6,p9,charakter_nehody,accident_characteristic,0,0.0
7,p13a,usmrceno_osob,fatalities,0,0.0
8,p13b,těžce_zraněno_osob,serious_injuries,0,0.0
9,p13c,lehce_zraněno_osob,light_injuries,0,0.0


In [8]:
df_accidents_in_time['p2a'] = pd.to_datetime(df_accidents_in_time['p2a'], format='%d.%m.%Y')
df_accidents_in_time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 519272 entries, 0 to 519271
Data columns (total 17 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   p1          519272 non-null  object        
 1   id_vozidla  519272 non-null  object        
 2   p2a         519272 non-null  datetime64[ns]
 3   p2b         444895 non-null  object        
 4   p5a         519272 non-null  object        
 5   p6          519272 non-null  object        
 6   p9          519272 non-null  object        
 7   p13a        519272 non-null  int64         
 8   p13b        519272 non-null  int64         
 9   p13c        519272 non-null  int64         
 10  p14         519268 non-null  float64       
 11  p29         12443 non-null   object        
 12  p34         519272 non-null  int64         
 13  p36         519272 non-null  object        
 14  p44         519272 non-null  object        
 15  p59a        396065 non-null  object        
 16  p5

In [19]:
df_accidents_in_time['Year'] = df_accidents_in_time['p2a'].dt.year
df_accidents_in_time['Month'] = df_accidents_in_time['p2a'].dt.month

In [20]:
df_accidents_in_time.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 519272 entries, 0 to 519271
Data columns (total 19 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   p1          519272 non-null  object        
 1   id_vozidla  519272 non-null  object        
 2   p2a         519272 non-null  datetime64[ns]
 3   p2b         444895 non-null  object        
 4   p5a         519272 non-null  object        
 5   p6          519272 non-null  object        
 6   p9          519272 non-null  object        
 7   p13a        519272 non-null  int64         
 8   p13b        519272 non-null  int64         
 9   p13c        519272 non-null  int64         
 10  p14         519268 non-null  float64       
 11  p29         12443 non-null   object        
 12  p34         519272 non-null  int64         
 13  p36         519272 non-null  object        
 14  p44         519272 non-null  object        
 15  p59a        396065 non-null  object        
 16  p5

In [22]:
yearly = px.histogram(df_accidents_in_time,
                                      x='p1',
                                      barmode="group",
                                      title="Počet nehod v letech",
                                      template="plotly_white")

yearly.show()


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed