# Pandas Tutorial - Part 48

This notebook covers various Series methods including:
- Calculating skewness with `skew()`
- Shifting data without copying with `slice_shift()`
- Converting Series to JSON with `to_json()`

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from scipy import stats

%matplotlib inline

## Calculating Skewness

The `skew()` method calculates the unbiased skewness of a Series. Skewness is a measure of the asymmetry of the probability distribution of a random variable about its mean.

In [None]:
# Create a Series with a normal distribution
np.random.seed(42)
normal_data = pd.Series(np.random.normal(0, 1, 1000))

# Calculate skewness
normal_skew = normal_data.skew()
print(f"Skewness of normal distribution: {normal_skew}")

In [None]:
# Create a Series with a right-skewed distribution
right_skewed = pd.Series(np.random.exponential(1, 1000))

# Calculate skewness
right_skew = right_skewed.skew()
print(f"Skewness of right-skewed distribution: {right_skew}")

In [None]:
# Create a Series with a left-skewed distribution
left_skewed = pd.Series(10 - np.random.exponential(1, 1000))

# Calculate skewness
left_skew = left_skewed.skew()
print(f"Skewness of left-skewed distribution: {left_skew}")

In [None]:
# Visualize the distributions
fig, axes = plt.subplots(3, 1, figsize=(10, 12))

# Normal distribution
axes[0].hist(normal_data, bins=30, alpha=0.7)
axes[0].set_title(f'Normal Distribution (Skewness: {normal_skew:.4f})')
axes[0].axvline(normal_data.mean(), color='r', linestyle='dashed', linewidth=2)

# Right-skewed distribution
axes[1].hist(right_skewed, bins=30, alpha=0.7)
axes[1].set_title(f'Right-Skewed Distribution (Skewness: {right_skew:.4f})')
axes[1].axvline(right_skewed.mean(), color='r', linestyle='dashed', linewidth=2)

# Left-skewed distribution
axes[2].hist(left_skewed, bins=30, alpha=0.7)
axes[2].set_title(f'Left-Skewed Distribution (Skewness: {left_skew:.4f})')
axes[2].axvline(left_skewed.mean(), color='r', linestyle='dashed', linewidth=2)

plt.tight_layout()
plt.show()

In [None]:
# Create a Series with missing values
data_with_nan = pd.Series([1, 2, 3, np.nan, 5, 6, np.nan, 8, 9, 10])
print("Series with missing values:")
print(data_with_nan)

In [None]:
# Calculate skewness with skipna=True (default)
skew_skipna = data_with_nan.skew()
print(f"Skewness with skipna=True: {skew_skipna}")

In [None]:
# Calculate skewness with skipna=False
skew_no_skipna = data_with_nan.skew(skipna=False)
print(f"Skewness with skipna=False: {skew_no_skipna}")

## Shifting Data Without Copying

The `slice_shift()` method shifts the data without copying. Unlike `shift()`, which returns a Series of the same length with NaN values, `slice_shift()` returns a smaller Series without the dropped periods.

In [None]:
# Create a Series
s = pd.Series([1, 2, 3, 4, 5])
print("Original Series:")
print(s)

In [None]:
# Shift using shift() method
s_shift = s.shift(periods=2)
print("Series shifted by 2 periods using shift():")
print(s_shift)

In [None]:
# Shift using slice_shift() method
s_slice_shift = s.slice_shift(periods=2)
print("Series shifted by 2 periods using slice_shift():")
print(s_slice_shift)

In [None]:
# Negative shift using shift() method
s_neg_shift = s.shift(periods=-2)
print("Series shifted by -2 periods using shift():")
print(s_neg_shift)

In [None]:
# Negative shift using slice_shift() method
s_neg_slice_shift = s.slice_shift(periods=-2)
print("Series shifted by -2 periods using slice_shift():")
print(s_neg_slice_shift)

In [None]:
# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [10, 20, 30, 40, 50]})
print("Original DataFrame:")
print(df)

In [None]:
# Shift DataFrame using slice_shift()
df_slice_shift = df.slice_shift(periods=2)
print("DataFrame shifted by 2 periods using slice_shift():")
print(df_slice_shift)

## Converting Series to JSON

The `to_json()` method converts a Series to a JSON string.

In [None]:
# Create a Series
s = pd.Series(['a', 'b', 'c', 'd'], index=['w', 'x', 'y', 'z'])
print("Original Series:")
print(s)

In [None]:
# Convert to JSON (default orient='index')
json_index = s.to_json()
print("JSON with orient='index' (default):")
print(json_index)

# Pretty print the JSON
print("\nPretty printed JSON:")
print(json.dumps(json.loads(json_index), indent=4))

In [None]:
# Convert to JSON with orient='split'
json_split = s.to_json(orient='split')
print("JSON with orient='split':")
print(json_split)

# Pretty print the JSON
print("\nPretty printed JSON:")
print(json.dumps(json.loads(json_split), indent=4))

In [None]:
# Convert to JSON with orient='records'
json_records = s.to_json(orient='records')
print("JSON with orient='records':")
print(json_records)

# Pretty print the JSON
print("\nPretty printed JSON:")
print(json.dumps(json.loads(json_records), indent=4))

In [None]:
# Convert to JSON with orient='values'
json_values = s.to_json(orient='values')
print("JSON with orient='values':")
print(json_values)

# Pretty print the JSON
print("\nPretty printed JSON:")
print(json.dumps(json.loads(json_values), indent=4))

In [None]:
# Convert to JSON with orient='table'
json_table = s.to_json(orient='table')
print("JSON with orient='table':")
print(json_table)

# Pretty print the JSON
print("\nPretty printed JSON:")
print(json.dumps(json.loads(json_table), indent=4))

In [None]:
# Create a Series with different data types
s_mixed = pd.Series([1, 2.5, 'a', True, pd.Timestamp('2023-01-01')])
print("Series with mixed data types:")
print(s_mixed)

In [None]:
# Convert to JSON
json_mixed = s_mixed.to_json()
print("JSON for Series with mixed data types:")
print(json_mixed)

# Pretty print the JSON
print("\nPretty printed JSON:")
print(json.dumps(json.loads(json_mixed), indent=4))

In [None]:
# Create a Series with datetime index
date_s = pd.Series([1, 2, 3, 4], index=pd.date_range('2023-01-01', periods=4))
print("Series with datetime index:")
print(date_s)

In [None]:
# Convert to JSON with default date_format ('epoch')
json_date_epoch = date_s.to_json()
print("JSON with default date_format ('epoch'):")
print(json_date_epoch)

# Pretty print the JSON
print("\nPretty printed JSON:")
print(json.dumps(json.loads(json_date_epoch), indent=4))

In [None]:
# Convert to JSON with date_format='iso'
json_date_iso = date_s.to_json(date_format='iso')
print("JSON with date_format='iso':")
print(json_date_iso)

# Pretty print the JSON
print("\nPretty printed JSON:")
print(json.dumps(json.loads(json_date_iso), indent=4))

In [None]:
# Convert to JSON with different date_unit
json_date_s = date_s.to_json(date_unit='s')
json_date_ms = date_s.to_json(date_unit='ms')  # default
json_date_us = date_s.to_json(date_unit='us')
json_date_ns = date_s.to_json(date_unit='ns')

print("JSON with date_unit='s':")
print(json_date_s)
print("\nJSON with date_unit='ms' (default):")
print(json_date_ms)
print("\nJSON with date_unit='us':")
print(json_date_us)
print("\nJSON with date_unit='ns':")
print(json_date_ns)

In [None]:
# Convert to JSON with indent
json_indent = s.to_json(indent=4)
print("JSON with indent=4:")
print(json_indent)

## Conclusion

In this notebook, we've explored various Series methods in pandas:

1. Calculating skewness with `skew()`, which measures the asymmetry of the probability distribution of a Series.
2. Shifting data without copying with `slice_shift()`, which returns a smaller Series without the dropped periods.
3. Converting Series to JSON with `to_json()`, which provides various options for formatting and encoding.

These methods are essential tools for data manipulation, analysis, and serialization in pandas, allowing for flexible and powerful operations on your data.