# Chapter 2: Attribute data operations

### Prerequisites

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import rasterio 

In [None]:
data_path = "F:\\books\\geocompy-main\\data\\"
output_path = "F:\\books\\geocompy-main\\output\\"
world = gpd.read_file(data_path + "world.gpkg")
src_elev = rasterio.open(output_path + "elev.tif")
src_grain = rasterio.open(output_path + "grain.tif")
src_multi_rast = rasterio.open(data_path + "landsat.tif")
coffee_data = pd.read_csv(data_path + "coffee_data.csv")

## 2.2 Vector attribute manipulation

### 2.2.1 Vector attribute subsetting


In [None]:
world.iloc[0:3, :]

In [None]:
world.iloc[:, 0:3]

In [None]:
world.iloc[0:3, 0:3]  # Display first three rows and columns of the world GeoDataFrame

In [None]:
world[['name_long', 'geometry']]  # Display all rows of the 'name_long' and 'geometry' columns

In [None]:
world.loc[:, 'name_long':'pop']

In [None]:
world.drop([2, 3, 5])

In [None]:
world.drop(columns=['name_long', 'continent'])  # Drop the 'name_long' and 'continent' columns

In [None]:
world.drop(['name_long', 'continent'], axis=1)  # Drop the 'name_long' and 'continent' columns

In [None]:
world.drop(index=[2, 3, 5])  # Drop rows with indices 2, 3, and 5 from the world GeoDataFrame

In [None]:
world[['name_long', 'pop']].rename(columns={'name_long': 'country', 'pop': 'population'})  # Rename columns in the world GeoDataFrame

In [None]:
idx_small = world.area_km2 < 10_000
small_countries = world[idx_small]  # Filter small countries based on area_km2
small_countries

In [None]:
world[world.continent == 'Asia'] \
    .loc[:, ['name_long', 'continent']] \
    .iloc[0:5, :]  # Filter Asian countries and display the first five rows of 'name_long' and 'continent' columns

In [None]:
world[
    (world.continent == 'North America') |
    (world.continent == 'South America')
] \
.loc[:, ['name_long', 'continent']]

### 2.2.2 Vector attribute aggregation

In [None]:
world_agg1 = world.groupby('continent')[['pop']].sum().reset_index()  # Group by continent and sum the population
world_agg1

In [None]:
world_agg2 = world[['continent', 'pop','geometry']] \
    .dissolve(by='continent', aggfunc='sum') \
    .reset_index()  # Dissolve the GeoDataFrame by continent and sum the population
world_agg2

In [None]:
fig, ax = plt.subplots(figsize=(6, 3))
world_agg2.plot(column='pop', ax=ax, edgecolor='black', legend=True)
ax.set_title('Population by Continent')

In [None]:
world_agg3 = world.dissolve(
    by='continent',
    aggfunc={
        'name_long': 'count',
        'pop': 'sum',
        'area_km2': 'sum'
    }
    ).rename(columns={'name_long': 'n'}).reset_index()
world_agg3 # Dissolve the GeoDataFrame by continent and aggregate multiple columns

In [None]:
# Summed population
fig, ax = plt.subplots(figsize=(5, 2.5))
world_agg3.plot(column='pop', edgecolor='black', legend=True, ax=ax)

In [None]:
# Summed area
fig, ax = plt.subplots(figsize=(5, 2.5))
world_agg3.plot(column='area_km2', edgecolor='black', legend=True, ax=ax)

In [None]:
# Count of countries
fig, ax = plt.subplots(figsize=(5, 2.5))
world_agg3.plot(column='n', edgecolor='black', legend=True, ax=ax)

In [None]:
world_agg4 = world_agg3.drop(columns=['geometry'])  # Drop the geometry column from the aggregated DataFrame
world_agg4['density'] = world_agg4['pop'] / world_agg4['area_km2']  # Calculate population density
world_agg4 = world_agg4.sort_values(by='n', ascending=False)  # Sort by density in descending order
world_agg4 = world_agg4.head(3)
world_agg4

### 2.2.3 Vector attribute joining

In [None]:
world_coffee = pd.merge(world, coffee_data, on='name_long', how='left')
world_coffee  # Merge world GeoDataFrame with coffee data

In [None]:
base = world_coffee.plot(color='white', edgecolor='lightgrey')
coffee_map = world_coffee.plot(ax=base, column='coffee_production_2017')

In [None]:
pd.merge(world, coffee_data, on='name_long', how='inner')  # Merge world GeoDataFrame with coffee data

### 2.2.4 Creating attributes and removing spatial information

In [None]:
world2 = world.copy()
world2['pop_density'] = world2['pop'] / world2['area_km2']
world2


In [None]:
world2[['name_long', 'pop', 'area_km2', 'pop_density', 'geometry']]

In [None]:
world2['con_reg'] = world['continent'] + ':' + world2['region_un']  # Create a new column 'con_reg' by concatenating continent and region
world2 = world2.drop(columns = ['continent', 'region_un'])  # Drop the original continent and region columns
world2

In [None]:
world2[['continent', 'region_un']] = world2['con_reg'] \
    .str.split(':', expand=True)  # Split the 'con_reg' column back into continent and region columns
world2[['name_long', 'con_reg', 'continent', 'region_un', 'geometry']].head(3)  # Display the first three rows of the modified DataFrame

In [None]:
world2.rename(columns={'name_long': 'name'}, inplace=True)  # Rename 'name_long' to 'name'
world2[['name', 'con_reg', 'continent', 'region_un', 'geometry']].head(3)  # Display the first three rows of the modified DataFrame

In [None]:
new_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'geom', 'i', 'j', 'k', 'l']
world2.columns = new_names
world2

In [None]:
names = sorted(world2.columns, reverse=True)  # Sort the column names in descending order
world2 = world2[names]  # Reorder the columns based on the sorted names
world2

In [None]:
world2.columns

In [None]:
world2 = world2.drop('geom', axis=1)  # Drop the 'geom' column from the DataFrame
world2 = pd.DataFrame(world2)

In [None]:
world2.columns