## Exploratory Data Analysis (EDA) and Data Visualization
### Basic EDA - basic EDA operation for the better dataset understanding. It's very usefull for Analysis and Visualization.

In [None]:
import pandas as pd


In [None]:
Df=pd.read_csv("quotes.csv")
Df.head()                          # It's very basic how lookes like my dataset

##### 1. How much null values in my dataset.

In [None]:
missing_values = df.isnull().sum()
print(f"\nMissing Values:\n{missing_values}")

##### 2. Total number of quotes

In [None]:
unique_authors = Df['author'].nunique()
print(f"Unique Authors: {unique_authors}")

##### 3. Total number of unique tags

In [None]:
all_tags = Df['tag_name'].dropna().str.split(',').explode().str.strip()
unique_tags = all_tags.nunique()
print(f"\nTotal Unique Tags: {unique_tags}")

##### 5. Average length of quotes

In [None]:
avg_quote_length = Df['quote'].str.len().mean()
print(f"Average Quote Length: {avg_quote_length:.2f} characters")


##### 6. Display descriptive statistics

In [None]:
print("\nDescriptive Statistics:\n", Df.describe(include='all'))

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import re

sns.set(style="whitegrid") # Set plot style

## Visualization

### 1. Bar Chart - Top 10 Most Quoted Authors

In [None]:
top_authors = Df['author'].value_counts().head(10)  # Count the number of quotes per author and select the top 10 most quoted authors
plt.figure(figsize=(12, 6))
sns.barplot(x=top_authors.values, y=top_authors.index, palette="viridis") # Create a horizontal bar plot to visualize the top 10 quoted authors
plt.xlabel("Number of Quotes")
plt.ylabel("Author")
plt.title("Top 10 Most Quoted Authors")
plt.show()


### 2. Word Cloud - Most Common Words in Quotes

In [None]:
all_text = " ".join(Df['quote'])  # Combine all quotes into one large string
all_text = re.sub(r"[^\w\s]", "", all_text).lower()  # Clean text: remove special characters and convert to lowercase

wordcloud = WordCloud(width=800, height=400, background_color="white", colormap="viridis").generate(all_text)

plt.figure(figsize=(12, 6))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title("Most Common Words in Quotes")
plt.show()

### 3. Pie Chart - Proportion of Top 5 Tags

In [2]:
all_tags = df['tag_name'].dropna().str.split(',').explode().str.strip()  # Split tags and count occurrences
top_tags = all_tags.value_counts().head(5)

plt.figure(figsize=(8, 8))
plt.pie(top_tags, labels=top_tags.index, autopct='%1.1f%%', colors=sns.color_palette("viridis", 5))
plt.title("Proportion of Top 5 Tags")
plt.show()

## Purpose & Insights:
#### 1. Identify which authors have the most quotes in the dataset.
#### 2. Useful for content analysis, literary studies, or recommendation systems.
#### 3. Help in understanding the most influential or frequently cited authors.