# Band to band regression
This notebook reads the batch exported geotiff file of paried Landsat 7 and Landsat 8 images and convert the pixel values to vaex dataframe.
Both Ordinary Least Square (OLS) Regression Model and Reduced Major Axis Regression (RMA) Model.
RMA was calculated using the python package from https://github.com/OceanOptics/pylr2.

Users will need to change the input and output folder. 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error #, r2_score
from scipy import stats
import seaborn as sns
import vaex as vx
from pylr2.regress2 import regress2
from geopyfsn import getBand

In [None]:
sns.set_theme(style="darkgrid", font="Arial")
SMALL_SIZE = 12
MEDIUM_SIZE = 14
BIGGER_SIZE = 20

blue

In [None]:
folderPath = r"/data/shunan/data/harmonize_data/201305_08landsat/blue"
L8, L7 = getBand(folderPath)
df = pd.DataFrame({'L8': L8, 'L7': L7})
df = vx.from_pandas(df)
del L7,L8

In [None]:

# plot
slope, intercept, r_value, p_value, std_err = stats.linregress(df.L7.values, df.L8.values)
slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(df.L8.values, df.L7.values)
rma_results = regress2(df.L7.values, df.L8.values, _method_type_2="reduced major axis")

fig, ax = plt.subplots(figsize=(8,7))
plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=BIGGER_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# plot regression model
plt.plot(np.array([0,1]), slope * np.array([0,1]) + intercept, color='red') # ols regression etm+ vs oli
plt.plot([0,1], [0,1], color = 'white') # reference line
# plt.plot(np.array([0,1]), (np.array([0,1]) - intercept1) / slope1 + intercept, color='peru') # ols regression oli vs etm+ 
plt.xlim(0, 1)
plt.ylim(0, 1)
# plt.axis('equal')
k = rma_results['slope']
b = rma_results['intercept'] 
plt.plot(np.array([0,1]), k * np.array([0,1]) + b, color='black') # rma regression

ax.annotate('n:%.0f' % (len(df.L8.values)), xy=(0.7, 0.1),  xycoords='data',
            horizontalalignment='left', verticalalignment='top',
            )
# scatter plot  
df.viz.heatmap(df.L7, df.L8, what=np.log(vx.stat.count()), show=True, colormap="viridis", vmin=0.0, vmax=6,
    xlabel='Blue L7 Surface Reflectance', ylabel='Blue L8 Surface Reflectance')
ax.set_aspect('equal', 'box')
fig.savefig('print/L7L8/Blue.jpg', dpi=300)

print('OLS: \ny={0:.4f}x+{1:.4f}\nOLS_r:{2:.2f}, p:{3:.3f}, n:{4:.0f}'.format(slope,intercept,r_value,p_value, len(df.L8.values)))
print('RMA: \ny={0:.4f}x+{1:.4f}\nRMA_r:{2:.2f}'.format(k,b, rma_results['r']))
print('RMSE is %.4f' % (mean_squared_error(df.L8.values, df.L7.values, squared=False)))

In [None]:
# sns.residplot(x=df.L7.values, y=df.L8.values)


In [None]:
df.viz.histogram(x=df.L7, label='L7')   
df.viz.histogram(x=df.L8, label='L8')   
# df.viz.histogram(x=df.L7 * 1.1017 - 0.0084, label='L7adjusted')
plt.legend()

green

In [None]:
folderPath = r"/data/shunan/data/harmonize_data/201305_08landsat/green"
# folderPath = r"/data/shunan/data/harmonize_data/201305_08landsat/green"
L8, L7 = getBand(folderPath)
df = pd.DataFrame({'L8': L8, 'L7': L7})
df = vx.from_pandas(df)
del L7,L8


In [None]:
# plot
slope, intercept, r_value, p_value, std_err = stats.linregress(df.L7.values, df.L8.values)
slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(df.L8.values, df.L7.values)
# slope, intercept, r_value, p_value, std_err = stats.linregress(np.log(L7),np.log(L8))

# Compute regression type 2
rma_results = regress2(df.L7.values, df.L8.values, _method_type_2="reduced major axis")

fig, ax = plt.subplots(figsize=(8,7))
plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=BIGGER_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# plot regression model
plt.plot(np.array([0,1]), slope * np.array([0,1]) + intercept, color='red') # ols regression etm+ vs oli
plt.plot([0,1], [0,1], color = 'white') # reference line
# plt.plot(np.array([0,1]), (np.array([0,1]) - intercept1) / slope1 + intercept, color='peru') # ols regression oli vs etm+ 
plt.xlim(0, 1)
plt.ylim(0, 1)
# plt.axis('equal')
k = rma_results['slope']
b = rma_results['intercept'] 
plt.plot(np.array([0,1]), k * np.array([0,1]) + b, color='black') # rma regression

ax.annotate('n:%.0f' % (len(df.L8.values)), xy=(0.7, 0.1),  xycoords='data',
            horizontalalignment='left', verticalalignment='top',
            )

# scatter plot
df.viz.heatmap(df.L7, df.L8, what=np.log(vx.stat.count()), show=True, colormap="viridis", vmin=0.0, vmax=6,
    xlabel='Green L7 Surface Reflectance', ylabel='Green L8 Surface Reflectance')
ax.set_aspect('equal', 'box')    
fig.savefig('print/L7L8/Green.jpg', dpi=300)   

print('OLS: \ny={0:.4f}x+{1:.4f}\nOLS_r:{2:.2f}, p:{3:.3f}, n:{4:.0f}'.format(slope,intercept,r_value,p_value, len(df.L8.values)))
print('RMA: \ny={0:.4f}x+{1:.4f}\nRMA_r:{2:.2f}'.format(k,b, rma_results['r']))
print('RMSE is %.4f' % (mean_squared_error(df.L8.values, df.L7.values, squared=False)))


In [None]:
# sns.residplot(x=df.L7.values, y=df.L8.values)

In [None]:
df.viz.histogram(x=df.L7, label='L7')   
df.viz.histogram(x=df.L8, label='L8')   
plt.legend()

red

In [None]:
folderPath = r"/data/shunan/data/harmonize_data/201305_08landsat/red"
L8, L7 = getBand(folderPath)
df = pd.DataFrame({'L8': L8, 'L7': L7})
df = vx.from_pandas(df)
del L7,L8

In [None]:
# plot
slope, intercept, r_value, p_value, std_err = stats.linregress(df.L7.values, df.L8.values)
slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(df.L8.values, df.L7.values)
# slope, intercept, r_value, p_value, std_err = stats.linregress(np.log(L7),np.log(L8))

# Compute regression type 2
rma_results = regress2(df.L7.values, df.L8.values, _method_type_2="reduced major axis")


fig, ax = plt.subplots(figsize=(8,7))
plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=BIGGER_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# plot regression model
plt.plot(np.array([0,1]), slope * np.array([0,1]) + intercept, color='red') # ols regression etm+ vs oli
plt.plot([0,1], [0,1], color = 'white') # reference line
# plt.plot(np.array([0,1]), (np.array([0,1]) - intercept1) / slope1 + intercept, color='peru') # ols regression oli vs etm+ 
plt.xlim(0, 1)
plt.ylim(0, 1)
# plt.axis('equal')
k = rma_results['slope']
b = rma_results['intercept'] 
plt.plot(np.array([0,1]), k * np.array([0,1]) + b, color='black') # rma regression

ax.annotate('n:%.0f' % (len(df.L8.values)), xy=(0.7, 0.1),  xycoords='data',
            horizontalalignment='left', verticalalignment='top',
            )

# scatter plot
df.viz.heatmap(df.L7, df.L8, what=np.log(vx.stat.count()), show=True, colormap="viridis", vmin=0.0, vmax=6,
    xlabel='Red L7 Surface Reflectance', ylabel='Red L8 Surface Reflectance')
ax.set_aspect('equal', 'box')    
fig.savefig('print/L7L8/Red.jpg', dpi=300) 

print('OLS: \ny={0:.4f}x+{1:.4f}\nOLS_r:{2:.2f}, p:{3:.3f}, n:{4:.0f}'.format(slope,intercept,r_value,p_value, len(df.L8.values)))
print('RMA: \ny={0:.4f}x+{1:.4f}\nRMA_r:{2:.2f}'.format(k,b, rma_results['r']))
print('RMSE is %.4f' % (mean_squared_error(df.L8.values, df.L7.values, squared=False)))


In [None]:
# sns.residplot(x=df.L7.values, y=df.L8.values)

In [None]:
df.viz.histogram(x=df.L7, label='L7')   
df.viz.histogram(x=df.L8, label='L8')   
plt.legend()

nir

In [None]:
folderPath = r"/data/shunan/data/harmonize_data/201305_08landsat/nir"
L8, L7 = getBand(folderPath)
df = pd.DataFrame({'L8': L8, 'L7': L7})
df = vx.from_pandas(df)
del L7,L8

In [None]:
# plot
slope, intercept, r_value, p_value, std_err = stats.linregress(df.L7.values, df.L8.values)
slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(df.L8.values, df.L7.values)
# slope, intercept, r_value, p_value, std_err = stats.linregress(np.log(L7),np.log(L8))

# Compute regression type 2
rma_results = regress2(df.L7.values, df.L8.values, _method_type_2="reduced major axis")


fig, ax = plt.subplots(figsize=(8,7))
plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=BIGGER_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# plot regression model
plt.plot(np.array([0,1]), slope * np.array([0,1]) + intercept, color='red') # ols regression etm+ vs oli
plt.plot([0,1], [0,1], color = 'white') # reference line
# plt.plot(np.array([0,1]), (np.array([0,1]) - intercept1) / slope1 + intercept, color='peru') # ols regression oli vs etm+ 
plt.xlim(0, 1)
plt.ylim(0, 1)
# plt.axis('equal')
k = rma_results['slope']
b = rma_results['intercept'] 
plt.plot(np.array([0,1]), k * np.array([0,1]) + b, color='black') # rma regression

ax.annotate('n:%.0f' % (len(df.L8.values)), xy=(0.7, 0.1),  xycoords='data',
            horizontalalignment='left', verticalalignment='top',
            )

# scatter plot
df.viz.heatmap(df.L7, df.L8, what=np.log(vx.stat.count()), show=True, colormap="viridis", vmin=0.0, vmax=6,
    xlabel='NIR L7 Surface Reflectance', ylabel='NIR L8 Surface Reflectance')
ax.set_aspect('equal', 'box')
fig.savefig(r'print/L7L8/Nir.jpg', dpi=300)   

print('OLS: \ny={0:.4f}x+{1:.4f}\nOLS_r:{2:.2f}, p:{3:.3f}, n:{4:.0f}'.format(slope,intercept,r_value,p_value, len(df.L8.values)))
print('RMA: \ny={0:.4f}x+{1:.4f}\nRMA_r:{2:.2f}'.format(k,b, rma_results['r']))
print('RMSE is %.4f' % (mean_squared_error(df.L8.values, df.L7.values, squared=False)))


In [None]:
# sns.residplot(x=df.L7.values, y=df.L8.values)

In [None]:
df.viz.histogram(x=df.L7, label='L7')   
df.viz.histogram(x=df.L8, label='L8')   
plt.legend()

swir1

In [None]:
folderPath = r"/data/shunan/data/harmonize_data/201305_08landsat/swir1"
L8, L7 = getBand(folderPath)
df = pd.DataFrame({'L8': L8, 'L7': L7})
df = vx.from_pandas(df)
del L7,L8

In [None]:
# plot
slope, intercept, r_value, p_value, std_err = stats.linregress(df.L7.values, df.L8.values)
slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(df.L8.values, df.L7.values)
# slope, intercept, r_value, p_value, std_err = stats.linregress(np.log(L7),np.log(L8))

# Compute regression type 2
rma_results = regress2(df.L7.values, df.L8.values, _method_type_2="reduced major axis")


fig, ax = plt.subplots(figsize=(8,7))
plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=BIGGER_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# plot regression model
plt.plot(np.array([0,1]), slope * np.array([0,1]) + intercept, color='red') # ols regression etm+ vs oli
plt.plot([0,1], [0,1], color = 'white') # reference line
# plt.plot(np.array([0,1]), (np.array([0,1]) - intercept1) / slope1 + intercept, color='peru') # ols regression oli vs etm+ 
plt.xlim(0, 1)
plt.ylim(0, 1)
# plt.axis('equal')
k = rma_results['slope']
b = rma_results['intercept'] 
plt.plot(np.array([0,1]), k * np.array([0,1]) + b, color='black') # rma regression

ax.annotate('n:%.0f' % (len(df.L8.values)), xy=(0.7, 0.1),  xycoords='data',
            horizontalalignment='left', verticalalignment='top',
            )

# scatter plot
df.viz.heatmap(df.L7, df.L8, what=np.log(vx.stat.count()), show=True, colormap="viridis", vmin=0.0, vmax=6,
    xlabel='SWIR1 L7 Surface Reflectance', ylabel='SWIR1 L8 Surface Reflectance')
ax.set_aspect('equal', 'box')    
fig.savefig('print/L7L8/SWIR1.jpg', dpi=300)  

print('OLS: \ny={0:.4f}x+{1:.4f}\nOLS_r:{2:.2f}, p:{3:.3f}, n:{4:.0f}'.format(slope,intercept,r_value,p_value, len(df.L8.values)))
print('RMA: \ny={0:.4f}x+{1:.4f}\nRMA_r:{2:.2f}'.format(k,b, rma_results['r']))
print('RMSE is %.4f' % (mean_squared_error(df.L8.values, df.L7.values, squared=False)))


In [None]:
# sns.residplot(x=df.L7.values, y=df.L8.values)

In [None]:
df.viz.histogram(x=df.L7, label='L7')   
df.viz.histogram(x=df.L8, label='L8')   
plt.legend()

swir2

In [None]:
folderPath = r"/data/shunan/data/harmonize_data/201305_08landsat/swir2"
L8, L7 = getBand(folderPath)
df = pd.DataFrame({'L8': L8, 'L7': L7})
df = vx.from_pandas(df)
del L7,L8

In [None]:
# plot
slope, intercept, r_value, p_value, std_err = stats.linregress(df.L7.values, df.L8.values)
slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(df.L8.values, df.L7.values)
# slope, intercept, r_value, p_value, std_err = stats.linregress(np.log(L7),np.log(L8))

# Compute regression type 2
rma_results = regress2(df.L7.values, df.L8.values, _method_type_2="reduced major axis")


fig, ax = plt.subplots(figsize=(8,7))
plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=BIGGER_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=BIGGER_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# plot regression model
plt.plot(np.array([0,1]), slope * np.array([0,1]) + intercept, color='red') # ols regression etm+ vs oli
plt.plot([0,1], [0,1], color = 'white') # reference line
# plt.plot(np.array([0,1]), (np.array([0,1]) - intercept1) / slope1 + intercept, color='peru') # ols regression oli vs etm+ 
plt.xlim(0, 1)
plt.ylim(0, 1)
# plt.axis('equal')
k = rma_results['slope']
b = rma_results['intercept'] 
plt.plot(np.array([0,1]), k * np.array([0,1]) + b, color='black') # rma regression

ax.annotate('n:%.0f' % (len(df.L8.values)), xy=(0.7, 0.1),  xycoords='data',
            horizontalalignment='left', verticalalignment='top',
            )

# scatter plot
df.viz.heatmap(df.L7, df.L8, what=np.log(vx.stat.count()), show=True, colormap="viridis", vmin=0.0, vmax=6,
    xlabel='SWIR2 L7 Surface Reflectance', ylabel='SWIR2 L8 Surface Reflectance')
ax.set_aspect('equal', 'box')    
fig.savefig('print/L7L8/SWIR2.jpg', dpi=300)      

print('OLS: \ny={0:.4f}x+{1:.4f}\nOLS_r:{2:.2f}, p:{3:.3f}, n:{4:.0f}'.format(slope,intercept,r_value,p_value, len(df.L8.values)))
print('RMA: \ny={0:.4f}x+{1:.4f}\nRMA_r:{2:.2f}'.format(k,b, rma_results['r']))
print('RMSE is %.4f' % (mean_squared_error(df.L8.values, df.L7.values, squared=False)))


In [None]:
# sns.residplot(x=df.L7.values, y=df.L8.values)

In [None]:
df.viz.histogram(x=df.L7, label='L7')   
df.viz.histogram(x=df.L8, label='L8')   
plt.legend()