# Tourist Data Analysis

This notebook analyzes tourist data from the Excel file 'Свод_Портрет_туриста_лето_зима_с_итогами.xlsx'.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
plt.style.use('seaborn')

# Load the data
df = pd.read_excel('Свод_Портрет_туриста_лето_зима_с_итогами.xlsx')

# Display basic information about the dataset
print(df.info())

# Display the first few rows of the dataset
display(df.head())

## Data Cleaning and Preprocessing

In [None]:
# Check for missing values
print(df.isnull().sum())

# Remove rows with all NaN values
df = df.dropna(how='all')

# Reset index after dropping rows
df = df.reset_index(drop=True)

# Display the first few rows after cleaning
display(df.head())

## Exploratory Data Analysis

In [None]:
# Analyze age distribution
age_columns = ['18-25', '26-35', '36-45', '46-55', 'старше 55 лет']
age_data = df[age_columns].sum().reset_index()
age_data.columns = ['Age Group', 'Count']

plt.figure(figsize=(10, 6))
sns.barplot(x='Age Group', y='Count', data=age_data)
plt.title('Age Distribution of Tourists')
plt.xlabel('Age Group')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Analyze gender distribution
gender_data = df[['Мужской', 'Женский']].sum()

plt.figure(figsize=(8, 8))
gender_data.plot(kind='pie', autopct='%1.1f%%')
plt.title('Gender Distribution of Tourists')
plt.ylabel('')  # Remove y-label
plt.show()

In [None]:
# Analyze top countries of origin
country_columns = df.columns[7:]  # Assuming country columns start from index 7
country_data = df[country_columns].sum().sort_values(ascending=False).head(10)

plt.figure(figsize=(12, 6))
sns.barplot(x=country_data.index, y=country_data.values)
plt.title('Top 10 Countries of Origin')
plt.xlabel('Country')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Statistical Analysis

In [None]:
# Perform basic statistical analysis on numerical columns
numerical_columns = df.select_dtypes(include=[np.number]).columns
display(df[numerical_columns].describe())

## Conclusion

This notebook provides a basic analysis of the tourist data. You can add more specific analyses based on your needs and the actual structure of your Excel file.