In [1]:
import pandas as pd

# Load the CSV file with encoding specified
file_path = '/content/IL_data.csv'  # Update this if your file path is different

# Try loading with different encodings to handle potential issues
try:
    data = pd.read_csv(file_path, encoding='utf-8')  # Default encoding
except UnicodeDecodeError:
    data = pd.read_csv(file_path, encoding='ISO-8859-1')  # Fallback encoding

# List of columns to analyze
columns_of_interest = [
    'iyear', 'imonth', 'iday', 'country', 'city',
    'latitude', 'longitude', 'nperps', 'nkill', 'nwound',
    'location', 'success', 'attacktype1', 'suicide',
    'targtype1', 'weaptype1_txt', 'gname', 'extended'
]

# Filter the dataset for the specified columns
filtered_data = data[columns_of_interest].dropna()

# Filter for rows where the country is 'Israel'
filtered_data_israel = filtered_data[filtered_data['country'] == 'Israel']

# Remove rows with negative values in numeric columns
for column in columns_of_interest:
    if pd.api.types.is_numeric_dtype(filtered_data[column]):
        filtered_data.loc[filtered_data[column] < 0, column] = pd.NA

# Function to display column information
def display_column_info(data):
    """
    Displays information about each column in the dataset, including:
    - Minimum and maximum values for numeric columns
    - Count of unique values
    - Up to 40 unique values for each column
    """
    for column in data.columns:
        print(f"Column: {column}")

        try:
            # Attempt to convert the column to numeric
            numeric_col = pd.to_numeric(data[column], errors='coerce')

            # Check for numeric data and print min/max if applicable
            if numeric_col.notna().any():
                print(f"Min Value: {numeric_col.min()}")
                print(f"Max Value: {numeric_col.max()}")
            else:
                print("No valid numeric data in this column after conversion.")
        except Exception as e:
            # Handle errors for non-numeric columns
            print(f"Error processing column {column}: {e}")

        # Count unique values in the column
        unique_count = data[column].nunique()
        print(f"Unique Values Count: {unique_count}")

        # Display up to 40 unique values
        unique_values = data[column].unique()
        print(f"Unique Values ({min(unique_count, 40)} shown):")
        print(unique_values[:40])

        print("-" * 40)

# Display unique values and statistics for the filtered data
display_column_info(filtered_data)


Column: iyear
Min Value: 1972.0
Max Value: 2017.0
Unique Values Count: 22
Unique Values (22 shown):
[1972. 1995. 1997. 1998. 1999. 2000. 2001. 2002. 2003. 2004. 2006. 2007.
 2008. 2009. 2010. 2011. 2012. 2013. 2014. 2015. 2016. 2017.]
----------------------------------------
Column: imonth
Min Value: 1.0
Max Value: 12.0
Unique Values Count: 12
Unique Values (12 shown):
[ 9. 11.  3. 10.  8.  1.  2.  4.  7. 12.  6.  5.]
----------------------------------------
Column: iday
Min Value: 1.0
Max Value: 31.0
Unique Values Count: 31
Unique Values (31 shown):
[20. 22.  4. 21. 26. 10. 27. 29. 25. 14. 28.  9.  3.  7. 30.  2. 16. 18.
 12. 19. 15.  8. 17. 11.  5. 24. 31.  1. 13.  6. 23.]
----------------------------------------
Column: country
Min Value: 97.0
Max Value: 97.0
Unique Values Count: 1
Unique Values (1 shown):
[97.]
----------------------------------------
Column: city
No valid numeric data in this column after conversion.
Unique Values Count: 55
Unique Values (40 shown):
['Jerusalem' '