<a href="https://colab.research.google.com/github/gagandeep02/Data-Visualisation/blob/main/Think_like_a_desiner_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import plotly.express as px

# ==========================
# LOAD DATA
# ==========================
file_path = "searched_with_top-queries_DE_20260223-1911_20260224-1911.csv"
df = pd.read_csv(file_path)

print(df.head())

# ==========================
# CLEAN DATA
# ==========================

# Convert percent column to numeric
df["increase percent"] = (
    df["increase percent"]
    .astype(str)
    .str.replace("%", "", regex=False)
    .replace("-", "0")
    .astype(float)
)

# Sort by search interest
df = df.sort_values("search interest", ascending=False)

# ==========================
# BAR CHART — SEARCH INTEREST
# ==========================
fig = px.bar(
    df,
    x="query",
    y="search interest",
    title="Top Google Trends Queries (Germany)",
    text="search interest",
)

fig.update_layout(
    xaxis_title="Search Query",
    yaxis_title="Search Interest",
    xaxis_tickangle=-45,
)

fig.show()

# ==========================
# BAR CHART — TREND GROWTH
# ==========================
fig_growth = px.bar(
    df.sort_values("increase percent", ascending=False),
    x="query",
    y="increase percent",
    title="Search Growth (%)",
    text="increase percent",
)

fig_growth.update_layout(
    xaxis_title="Query",
    yaxis_title="Increase %",
    xaxis_tickangle=-45,
)

fig_growth.show()

                  query  search interest increase percent
0             lululemon              100              -2%
1   lululemon athletica               19              20%
2          lululemon de               18              20%
3  lululemon düsseldorf               17              10%
4      lululemon berlin               10              20%


In [3]:
import pandas as pd
import plotly.express as px

# ==========================
# LOAD DATA
# ==========================
file_path = "searched_with_top-queries_DE_20260223-1920_20260224-1920.csv"
df = pd.read_csv(file_path)

# ==========================
# CLEAN DATA
# ==========================
df["increase percent"] = (
    df["increase percent"]
    .astype(str)
    .str.replace("%", "", regex=False)
    .replace("-", "0")
    .astype(float)
)

# lowercase for filtering
df["query_lower"] = df["query"].str.lower()

# ==========================
# FILTER BRANDS
# ==========================
lululemon_df = df[df["query_lower"].str.contains("lululemon")]
adidas_df = df[df["query_lower"].str.contains("adidas")]

# add brand labels
lululemon_df["brand"] = "Lululemon"
adidas_df["brand"] = "Adidas"

combined = pd.concat([lululemon_df, adidas_df])

# ==========================
# TOTAL SEARCH INTEREST
# ==========================
brand_summary = combined.groupby("brand").agg({
    "search interest": "sum",
    "increase percent": "mean"
}).reset_index()

print(brand_summary)

# ==========================
# BAR: Total Interest Comparison
# ==========================
fig = px.bar(
    brand_summary,
    x="brand",
    y="search interest",
    text="search interest",
    title="Lululemon vs Adidas — Total Search Interest (Germany)",
)

fig.show()

# ==========================
# BAR: Growth Comparison
# ==========================
fig_growth = px.bar(
    brand_summary,
    x="brand",
    y="increase percent",
    text="increase percent",
    title="Search Growth Comparison (%)",
)

fig_growth.show()

# ==========================
# DETAILED QUERY COMPARISON
# ==========================
fig_detail = px.bar(
    combined.sort_values("search interest", ascending=False),
    x="query",
    y="search interest",
    color="brand",
    title="Top Queries: Lululemon vs Adidas",
)

fig_detail.update_layout(xaxis_tickangle=-45)
fig_detail.show()

    brand  search interest  increase percent
0  Adidas              192         24.897436




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [4]:
import pandas as pd
import plotly.express as px

# ==============================
# MANUAL FINANCIAL DATA (USD)
# ==============================
data = {
    "Brand": ["Lululemon", "Adidas"],
    "Revenue_Billion_USD": [9.6, 23.7],   # approx 2024 revenue
    "Net_Profit_Billion_USD": [1.5, 0.3]  # approx net income
}

df = pd.DataFrame(data)

# ==============================
# CALCULATE PROFIT MARGIN
# ==============================
df["Profit_Margin_%"] = (
    df["Net_Profit_Billion_USD"] / df["Revenue_Billion_USD"] * 100
).round(2)

print(df)

# ==============================
# BAR CHART — REVENUE
# ==============================
fig_rev = px.bar(
    df,
    x="Brand",
    y="Revenue_Billion_USD",
    text="Revenue_Billion_USD",
    title="Revenue Comparison (Billion USD)",
)
fig_rev.show()

# ==============================
# BAR CHART — PROFIT
# ==============================
fig_profit = px.bar(
    df,
    x="Brand",
    y="Net_Profit_Billion_USD",
    text="Net_Profit_Billion_USD",
    title="Net Profit Comparison (Billion USD)",
)
fig_profit.show()

# ==============================
# BAR CHART — PROFIT MARGIN
# ==============================
fig_margin = px.bar(
    df,
    x="Brand",
    y="Profit_Margin_%",
    text="Profit_Margin_%",
    title="Profit Margin Comparison (%)",
)
fig_margin.show()

# ==============================
# COMBINED VISUAL (Nice One)
# ==============================
fig_combined = px.bar(
    df.melt(id_vars="Brand",
            value_vars=["Revenue_Billion_USD", "Net_Profit_Billion_USD"]),
    x="Brand",
    y="value",
    color="variable",
    barmode="group",
    title="Revenue vs Profit Comparison",
)
fig_combined.show()

       Brand  Revenue_Billion_USD  Net_Profit_Billion_USD  Profit_Margin_%
0  Lululemon                  9.6                     1.5            15.62
1     Adidas                 23.7                     0.3             1.27


In [5]:
import pandas as pd
import plotly.express as px

# ====================================
# SAMPLE EUROPE REVENUE DATA (Billion USD)
# (Example distribution for demo purposes)
# ====================================

data = {
    "Country": [
        "Germany", "France", "United Kingdom",
        "Italy", "Spain", "Netherlands",
        "Sweden", "Poland", "Switzerland"
    ],
    "ISO_Code": [
        "DEU", "FRA", "GBR",
        "ITA", "ESP", "NLD",
        "SWE", "POL", "CHE"
    ],
    "Adidas_Revenue": [5.5, 3.2, 4.0, 2.1, 1.8, 1.2, 0.9, 1.1, 0.8],
    "Lululemon_Revenue": [0.6, 0.4, 0.7, 0.3, 0.2, 0.25, 0.15, 0.2, 0.18]
}

df = pd.DataFrame(data)

# ====================================
# MAP 1 — ADIDAS IN EUROPE
# ====================================
fig_adidas = px.choropleth(
    df,
    locations="ISO_Code",
    color="Adidas_Revenue",
    hover_name="Country",
    scope="europe",
    title="Adidas Revenue Distribution in Europe (Billion USD)",
)

fig_adidas.show()


# ====================================
# MAP 2 — LULULEMON IN EUROPE
# ====================================
fig_lulu = px.choropleth(
    df,
    locations="ISO_Code",
    color="Lululemon_Revenue",
    hover_name="Country",
    scope="europe",
    title="Lululemon Revenue Distribution in Europe (Billion USD)",
)

fig_lulu.show()


# ====================================
# MAP 3 — REVENUE DIFFERENCE
# ====================================
df["Revenue_Difference"] = df["Adidas_Revenue"] - df["Lululemon_Revenue"]

fig_diff = px.choropleth(
    df,
    locations="ISO_Code",
    color="Revenue_Difference",
    hover_name="Country",
    scope="europe",
    title="Revenue Difference (Adidas - Lululemon)",
)

fig_diff.show()

In [1]:
import pandas as pd
import plotly.express as px

# ======================================
# GLOBAL REVENUE DATA (Approx Distribution)
# Values in Billion USD
# ======================================
data = {
    "Country": [
        "United States", "Canada", "Germany", "United Kingdom",
        "France", "China", "Japan", "Australia", "India", "Brazil"
    ],
    "ISO": ["USA", "CAN", "DEU", "GBR", "FRA", "CHN", "JPN", "AUS", "IND", "BRA"],

    # Approx global revenue distribution
    "Adidas_Revenue": [6.5, 1.2, 5.5, 4.0, 3.2, 4.5, 2.0, 1.1, 1.0, 0.9],
    "Lululemon_Revenue": [6.0, 1.5, 0.6, 0.7, 0.4, 1.2, 0.8, 0.9, 0.5, 0.4],
}

df = pd.DataFrame(data)

# ======================================
# FIND HIGHEST SALES PER COUNTRY
# ======================================
df["Top_Brand"] = df.apply(
    lambda row: "Adidas" if row["Adidas_Revenue"] > row["Lululemon_Revenue"] else "Lululemon",
    axis=1
)

df["Highest_Revenue"] = df[["Adidas_Revenue", "Lululemon_Revenue"]].max(axis=1)

print(df)

# ======================================
# WORLD MAP — HIGHEST SALES
# ======================================
fig = px.choropleth(
    df,
    locations="ISO",
    color="Highest_Revenue",
    hover_name="Country",
    hover_data=["Top_Brand"],
    title="World Map — Highest Sales by Country (Adidas vs Lululemon)",
    color_continuous_scale="Viridis",
)

fig.show()

          Country  ISO  Adidas_Revenue  Lululemon_Revenue  Top_Brand  \
0   United States  USA             6.5                6.0     Adidas   
1          Canada  CAN             1.2                1.5  Lululemon   
2         Germany  DEU             5.5                0.6     Adidas   
3  United Kingdom  GBR             4.0                0.7     Adidas   
4          France  FRA             3.2                0.4     Adidas   
5           China  CHN             4.5                1.2     Adidas   
6           Japan  JPN             2.0                0.8     Adidas   
7       Australia  AUS             1.1                0.9     Adidas   
8           India  IND             1.0                0.5     Adidas   
9          Brazil  BRA             0.9                0.4     Adidas   

   Highest_Revenue  
0              6.5  
1              1.5  
2              5.5  
3              4.0  
4              3.2  
5              4.5  
6              2.0  
7              1.1  
8              1.0

In [3]:
import pandas as pd
import plotly.express as px

# ======================================
# GLOBAL REVENUE DATA (Billion USD)
# ======================================
data = {
    "Country": [
        "United States", "Canada", "Germany", "United Kingdom",
        "France", "China", "Japan", "Australia", "India", "Brazil"
    ],
    "ISO": ["USA", "CAN", "DEU", "GBR", "FRA", "CHN", "JPN", "AUS", "IND", "BRA"],

    "Adidas_Revenue": [6.5, 1.2, 5.5, 4.0, 3.2, 4.5, 2.0, 1.1, 1.0, 0.9],
    "Lululemon_Revenue": [6.0, 1.5, 0.6, 0.7, 0.4, 1.2, 0.8, 0.9, 0.5, 0.4],
}

df = pd.DataFrame(data)

# ======================================
# ADD WINNER + MAX SALES
# ======================================
df["Top_Brand"] = df.apply(
    lambda row: "Adidas" if row["Adidas_Revenue"] > row["Lululemon_Revenue"] else "Lululemon",
    axis=1
)

df["Highest_Revenue"] = df[["Adidas_Revenue", "Lululemon_Revenue"]].max(axis=1)

# ======================================
# WORLD MAP WITH DETAILED HOVER
# ======================================
fig = px.choropleth(
    df,
    locations="ISO",
    color="Highest_Revenue",
    hover_name="Country",
    hover_data={
        "Adidas_Revenue": True,
        "Lululemon_Revenue": True,
        "Top_Brand": True,
        "ISO": False,
        "Highest_Revenue": False
    },
    color_continuous_scale="Viridis",
    title="Global Sales Comparison — Adidas vs Lululemon"
)

fig.show()

In [4]:
import pandas as pd
import plotly.express as px

# ======================================
# GLOBAL REVENUE DATA (Billion USD)
# ======================================
data = {
    "Country": [
        "United States", "Canada", "Germany", "United Kingdom",
        "France", "China", "Japan", "Australia", "India", "Brazil"
    ],
    "ISO": ["USA", "CAN", "DEU", "GBR", "FRA", "CHN", "JPN", "AUS", "IND", "BRA"],

    "Adidas_Revenue_Billion_USD": [6.5, 1.2, 5.5, 4.0, 3.2, 4.5, 2.0, 1.1, 1.0, 0.9],
    "Lululemon_Revenue_Billion_USD": [6.0, 1.5, 0.6, 0.7, 0.4, 1.2, 0.8, 0.9, 0.5, 0.4],
}

df = pd.DataFrame(data)

# ======================================
# Determine Winner + Highest Revenue
# ======================================
df["Top_Brand"] = df.apply(
    lambda row: "Adidas" if row["Adidas_Revenue_Billion_USD"] >
    row["Lululemon_Revenue_Billion_USD"] else "Lululemon",
    axis=1
)

df["Highest_Revenue"] = df[
    ["Adidas_Revenue_Billion_USD", "Lululemon_Revenue_Billion_USD"]
].max(axis=1)

# ======================================
# WORLD MAP
# ======================================
fig = px.choropleth(
    df,
    locations="ISO",
    color="Highest_Revenue",
    hover_name="Country",
    color_continuous_scale="Viridis",
    title="Global Sales Comparison (Billion USD)",
)

# ======================================
# CUSTOM HOVER FORMAT
# ======================================
fig.update_traces(
    hovertemplate=
    "<b>%{hovertext}</b><br><br>" +
    "Adidas: $%{customdata[0]:.2f}B<br>" +
    "Lululemon: $%{customdata[1]:.2f}B<br>" +
    "Winner: %{customdata[2]}<extra></extra>",
    customdata=df[[
        "Adidas_Revenue_Billion_USD",
        "Lululemon_Revenue_Billion_USD",
        "Top_Brand"
    ]]
)

# ======================================
# COLOR BAR TITLE (Legend)
# ======================================
fig.update_layout(
    coloraxis_colorbar=dict(
        title="Highest Revenue<br>(Billion USD)"
    )
)

fig.show()