# [Shortest superstring](https://leetcode.com/problems/find-the-shortest-superstring/)

- **Given**
    + A list of `words`
- **Find**: The smallest len string that
    + contains each string in `words` as a substring

#### Examples

```
Input: words = ["alex","loves","leetcode"]
Output: "alexlovesleetcode"

Input: words = ["catg","ctaagt","gcta","ttca","atgcatc"]
Output: "gctaagttcatgcatc"
```

#### Constraints
- $1 \leq \text{words.length} \leq 12$
- $1 \leq \text{words[i].length} \leq 20$


# Solution
- Reduce the problem to TSP
    + Each `words[i]` is a vertex
    + edge weight = the appended len of non overlaped substr between s1 and s2

<img src="./img/1.png" width="800"/>

#### DP bitmask - TSP - $O(2^N N^2)$

```Cpp
class Solution {
private:
    typedef unsigned long long ull;
    int __get_distance(const string &s1, const string &s2) {
        // s1 -> s2
        for(int i=0; i<s1.size(); ++i) {
            string overlapped_pattern = s1.substr(i);
            if(s2.rfind(overlapped_pattern, 0) == 0) return s2.size() - overlapped_pattern.size();
        }
        return s2.size();
    }
public:
    string shortestSuperstring(vector<string>& words) {
        if(words.size() == 1) return words[0];
        int N = words.size();

        // build graph
        vector<vector<int>> graph(N, vector<int>(N, -1));
        for(int i=0; i<words.size()-1; ++i) {
            for(int j=i+1; j<words.size(); ++j) {
                graph[i][j] = __get_distance(words[i], words[j]);
                graph[j][i] = __get_distance(words[j], words[i]);
            }
        }

        // TSP
        // dp[cur_city][visited_subset] = optimal_length
        vector<vector<int>> dp(N, vector<int>((1 << N), INT_MAX));
        for(int i=0; i<N; ++i) dp[i][1<<i] = words[i].size();

        int min_len_tsp = INT_MAX;
        vector<vector<int>> pre_(N, vector<int>((1 << N), -1));
        int last_city = -1;
        for(ull subset=0; subset < (1 << N); ++subset) {
            // loop all subset, travel i -> j
            //  i: in subset
            //  j: j != i and j not in subset
            for(int i=0; i<N; ++i) if(((1 << i) & subset) > 0) {
                for(int j=0; j<N; ++j) if(j != i && (((1 << j) & subset) == 0)) {
                    int new_len = dp[i][subset] + graph[i][j];
                    int subset_add_j = subset | (1 << j);

                    // relax
                    if(dp[j][subset_add_j] > new_len) {
                        dp[j][subset_add_j] = new_len;
                        pre_[j][subset_add_j] = i;

                        // If set full: relax last city and total length
                        if(subset_add_j == (1 << N)-1 && min_len_tsp > new_len) {
                            min_len_tsp = new_len;
                            last_city = j;
                        }
                    }
                }
            }
        }

        // Reconstruct path
        int cur_city = last_city;
        int subset = (1 << N) - 1;
        stack<int> path;
        while(cur_city != -1) {
            path.push(cur_city);
            int pre_city = pre_[cur_city][subset];
            subset = subset & ~(1 << cur_city);
            cur_city = pre_city;
        }

        // Reconstruct string
        cur_city = path.top(); path.pop();
        string ans = words[cur_city];
        while(!path.empty()) {
            int next_city = path.top(); path.pop();
            ans += words[next_city].substr(words[next_city].size() - graph[cur_city][next_city]);

            cur_city = next_city;
        }

        return ans;
    }
};
```

#### bfs/dfs TSP with pruning

```Cpp
class Solution {
private:
    typedef unsigned long long ull;
    int __get_distance(const string &s1, const string &s2) {
        // s1 -> s2
        for(int i=0; i<s1.size(); ++i) {
            string overlapped_pattern = s1.substr(i);
            if(s2.rfind(overlapped_pattern, 0) == 0) return s2.size() - overlapped_pattern.size();
        }
        return s2.size();
    }

    // data
    vector<string> _A;
    int _N;
    vector<vector<int>> _graph;

    // best ans
    int _min_len_tsp;
    vector<int> _optimal_path;

    // dfs
    vector<int> _dp;
    vector<vector<int>> _path;
    void __dfs(int start_city) {
        _dp.assign(1 << _N, INT_MAX);
        _dp[1<<start_city] = _A[start_city].size();

        _path.assign(1 << _N, vector<int>());
        _path[1<<start_city].push_back(start_city);

        stack<int> S;
        S.push(1<<start_city);

        while(!S.empty()) {
            // current city: i
            ull subset = S.top();
            int i = _path[subset].back();
            S.pop();

            // Prune
            if(_dp[subset] > _min_len_tsp) continue;

            // Relax: Reach last city
            if(subset == (1 << _N) - 1) {
                if(_min_len_tsp > _dp[(1 << _N)-1]) {
                    _min_len_tsp = _dp[(1 << _N)-1];
                    _optimal_path = _path[subset];
                }
            }

            // Visit next unvisited city
            // j: j != i and j not in subset
            else {
                for(int j=0; j<_N; ++j) if(j != i && (((1 << j) & subset) == 0)) {
                    int new_len = _dp[subset] + _graph[i][j];
                    int subset_add_j = subset | (1 << j);

                    // relax
                    if(_dp[subset_add_j] > new_len) {
                        _dp[subset_add_j] = new_len;

                        _path[subset_add_j] =_path[subset];
                        _path[subset_add_j].push_back(j);
                        S.push(subset_add_j);
                    }
                }
            }
        }
    }
public:
    string shortestSuperstring(vector<string>& words) {
        if(words.size() == 1) return words[0];
        _A = words;
        _N = _A.size();

        // build graph
        _graph.assign(_N, vector<int>(_N, -1));
        for(int i=0; i<words.size()-1; ++i) {
            for(int j=i+1; j<words.size(); ++j) {
                _graph[i][j] = __get_distance(_A[i], _A[j]);
                _graph[j][i] = __get_distance(_A[j], _A[i]);
            }
        }

        // scan dfs
        _min_len_tsp = INT_MAX;
        for(int start_city=0; start_city<_N; ++start_city) {
            __dfs(start_city);
        }

        // reconstruct best ans
        int cur_city = _optimal_path[0];
        string ans = _A[cur_city];
        for(int i=1; i<_optimal_path.size(); ++i) {
            int next_city = _optimal_path[i];
            ans += _A[next_city].substr(_A[next_city].size() - _graph[cur_city][next_city]);
            cur_city = next_city;
        }
        return ans;
    }
};
```