# Reverse Engineering of the Concrete Strength
This is as simple as retriving the datapoint for a given Target ('strength') value

In [1]:
import pandas as pd
import numpy as np

## given the following dataset

In [2]:
data = pd.read_csv('concrete.csv')

## Let's retrieve the datapoint for a given Target ('strength') value, say strength = 29.00

In [3]:
data[data['strength']==29]

Unnamed: 0,cement,slag,ash,water,superplastic,coarseagg,fineagg,age,strength
504,375.0,93.8,0.0,126.6,23.4,852.1,992.6,3,29.0


## that's good enough so let's create a function around it

In [4]:
def retrieve_features_value(data,Target,value):
    return data[data[Target]==value]

In [5]:
#Let's use the function if we are not satisfied then we can continue to reverve engineer
Target = 'strength'
retrieve_features_value(data,Target,29)

Unnamed: 0,cement,slag,ash,water,superplastic,coarseagg,fineagg,age,strength
504,375.0,93.8,0.0,126.6,23.4,852.1,992.6,3,29.0


## That's it! but let's try strength = 28

In [6]:
retrieve_features_value(data,Target,28)

Unnamed: 0,cement,slag,ash,water,superplastic,coarseagg,fineagg,age,strength


Nothing showed up hmmmmm there are gaps in the continuity of the observed dataset as it is a sample of the population, may be we can predict these gaps by fitting a model with the observed dataset to generate a dataset as close to population as possible

## Re-generate the Target 'strength' as close to population as possible rounded off to 3 decimal places:

In [7]:
#let us re-generate the 'strength' to be continous within 3 decimal places as close to population as possible to 3 decimal places:
rev_Target = 'strength'
max_population = data[rev_Target].max()
min_population = data[rev_Target].min()
Target_population = np.arange(min_population,max_population,0.001)
Target_population = pd.DataFrame(np.arange(min_population,max_population,0.001).round(3), columns=[rev_Target])
Target_population

Unnamed: 0,strength
0,2.330
1,2.331
2,2.332
3,2.333
4,2.334
...,...
80265,82.595
80266,82.596
80267,82.597
80268,82.598


## Now let us predict the independent variables of the above Target_population

In [8]:
#So now we have to train a model in a reversed manner from the observed origanal data and use the model to predict all the data points in our Target_population above. 
from sklearn.linear_model import LinearRegression

#notice the reversal of former feature variable 'age' now used as the Target variable (y) vice versa
x = data[['strength']]
y = data[['age']]
reg = LinearRegression()
reg.fit(x,y)
age_predicted = reg.predict(Target_population)
age_predicted = pd.DataFrame(reg.predict(age_predicted), columns=['age'])
age_predicted

Unnamed: 0,age
0,6.115529
1,6.117075
2,6.118622
3,6.120168
4,6.121715
...,...
80265,130.244070
80266,130.245617
80267,130.247163
80268,130.248710


## Now can we join the age_predicted and the Target_population into one dataframe?

In [9]:
T_p =Target_population.copy() #make a copy to prevent modification of Target_population at this point
T_p['age'] = age_predicted
T_p

Unnamed: 0,strength,age
0,2.330,6.115529
1,2.331,6.117075
2,2.332,6.118622
3,2.333,6.120168
4,2.334,6.121715
...,...,...
80265,82.595,130.244070
80266,82.596,130.245617
80267,82.597,130.247163
80268,82.598,130.248710


## Let us use function call to generate the rest of the variables

In [10]:
def reversed_prediction(data, Target):
    global Target_population
    x = data[[Target]]
    reg = LinearRegression()
    T_p =Target_population.copy() 
    for var in data.columns:
        if var != Target:
            y = data[[var]]
            reg.fit(x,y)
            age_predicted = reg.predict(T_p)
            age_predicted = pd.DataFrame(reg.predict(age_predicted), columns=[var])
            age_predicted
            Target_population[var] = age_predicted
    return Target_population

In [11]:
reverse_eng_data = reversed_prediction(data,Target)
reverse_eng_data

Unnamed: 0,strength,cement,slag,ash,water,superplastic,coarseagg,fineagg,age
0,2.330,720.465675,84.172955,41.249563,123.017045,1.754205,233.810160,159.847171,6.115529
1,2.331,720.475374,84.173440,41.249727,123.017182,1.754222,233.810749,159.847815,6.117075
2,2.332,720.485073,84.173925,41.249891,123.017319,1.754239,233.811339,159.848460,6.118622
3,2.333,720.494772,84.174409,41.250055,123.017456,1.754256,233.811928,159.849104,6.120168
4,2.334,720.504471,84.174894,41.250219,123.017593,1.754273,233.812517,159.849748,6.121715
...,...,...,...,...,...,...,...,...,...
80265,82.595,1498.942395,123.093295,54.423485,134.018732,3.129675,281.110391,211.556634,130.244070
80266,82.596,1498.952094,123.093780,54.423649,134.018869,3.129692,281.110980,211.557278,130.245617
80267,82.597,1498.961793,123.094265,54.423813,134.019006,3.129709,281.111569,211.557922,130.247163
80268,82.598,1498.971491,123.094750,54.423977,134.019143,3.129726,281.112159,211.558566,130.248710


## So now we can retrieve the feature values where strength is 28

In [12]:
retrieve_features_value(reverse_eng_data,Target,28)

Unnamed: 0,strength,cement,slag,ash,water,superplastic,coarseagg,fineagg,age
25670,28.0,969.434683,96.620287,45.462789,126.535556,2.194101,248.937512,176.384665,45.813774


## Not only strength of 28 but of any within 3 decimal place round off

In [13]:
retrieve_features_value(reverse_eng_data,Target,28.2)

Unnamed: 0,strength,cement,slag,ash,water,superplastic,coarseagg,fineagg,age
25870,28.2,971.374449,96.717267,45.495615,126.562969,2.197529,249.055372,176.513511,46.123071


In [14]:
retrieve_features_value(reverse_eng_data,Target,50.123)

Unnamed: 0,strength,cement,slag,ash,water,superplastic,coarseagg,fineagg,age
47793,50.123,1184.001935,107.347686,49.093844,129.56789,2.573215,261.974614,190.637059,80.026641
